diff --git a/requirements.txt b/requirements.txt index f14b614..fdf6caa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests==2.31.0 beautifulsoup4==4.12.2 +idna==3.6 diff --git a/save_iana_domains.py b/save_iana_domains.py index cff6fb7..b9c19e5 100644 --- a/save_iana_domains.py +++ b/save_iana_domains.py @@ -11,6 +11,7 @@ import time import json import os from datetime import datetime +import idna def fetch_domain_details(extension, url): """Fetch detailed information for a specific domain extension""" @@ -183,7 +184,13 @@ def fetch_iana_domains(): domain_text = domain_text[1:] if domain_text and len(domain_text) > 0: - url = urljoin(base_url, f"/domains/root/db/{domain_text}.html") + # Convert to Punycode for URL if it contains non-ASCII characters + try: + punycode_extension = idna.encode(domain_text).decode('ascii') + url = urljoin(base_url, f"/domains/root/db/{punycode_extension}.html") + except (idna.IDNAError, UnicodeError): + # Fallback to original if Punycode conversion fails + url = urljoin(base_url, f"/domains/root/db/{domain_text}.html") domains.append({ 'extension': domain_text,