Punycode encoding

This commit is contained in:
Kar
2026-03-10 13:33:52 +05:30
parent 760aaae6c5
commit 7cdf0f8358
2 changed files with 9 additions and 1 deletions

View File

@@ -1,2 +1,3 @@
requests==2.31.0
beautifulsoup4==4.12.2
idna==3.6

View File

@@ -11,6 +11,7 @@ import time
import json
import os
from datetime import datetime
import idna
def fetch_domain_details(extension, url):
"""Fetch detailed information for a specific domain extension"""
@@ -183,7 +184,13 @@ def fetch_iana_domains():
domain_text = domain_text[1:]
if domain_text and len(domain_text) > 0:
url = urljoin(base_url, f"/domains/root/db/{domain_text}.html")
# Convert to Punycode for URL if it contains non-ASCII characters
try:
punycode_extension = idna.encode(domain_text).decode('ascii')
url = urljoin(base_url, f"/domains/root/db/{punycode_extension}.html")
except (idna.IDNAError, UnicodeError):
# Fallback to original if Punycode conversion fails
url = urljoin(base_url, f"/domains/root/db/{domain_text}.html")
domains.append({
'extension': domain_text,