Punycode encoding
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
requests==2.31.0
|
||||
beautifulsoup4==4.12.2
|
||||
idna==3.6
|
||||
|
||||
@@ -11,6 +11,7 @@ import time
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
import idna
|
||||
|
||||
def fetch_domain_details(extension, url):
|
||||
"""Fetch detailed information for a specific domain extension"""
|
||||
@@ -183,7 +184,13 @@ def fetch_iana_domains():
|
||||
domain_text = domain_text[1:]
|
||||
|
||||
if domain_text and len(domain_text) > 0:
|
||||
url = urljoin(base_url, f"/domains/root/db/{domain_text}.html")
|
||||
# Convert to Punycode for URL if it contains non-ASCII characters
|
||||
try:
|
||||
punycode_extension = idna.encode(domain_text).decode('ascii')
|
||||
url = urljoin(base_url, f"/domains/root/db/{punycode_extension}.html")
|
||||
except (idna.IDNAError, UnicodeError):
|
||||
# Fallback to original if Punycode conversion fails
|
||||
url = urljoin(base_url, f"/domains/root/db/{domain_text}.html")
|
||||
|
||||
domains.append({
|
||||
'extension': domain_text,
|
||||
|
||||
Reference in New Issue
Block a user