120 lines
4.6 KiB
Python
120 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Script to map Punycode TLDs to their Unicode representations
|
||
"""
|
||
|
||
import idna
|
||
|
||
# Known IDN TLD mappings
|
||
IDN_MAPPINGS = {
|
||
'xn--p1ai': '.рф', # Russia
|
||
'xn--fiqs8s': '.中国', # China
|
||
'xn--fiqz9s': '.中國', # China (traditional)
|
||
'xn--lgbbat1ad8j': '.الجزائر', # Algeria
|
||
'xn--yfro4i67o': '.קום', # Israel (KOM)
|
||
'xn--4gbrim': '.مصر', # Egypt
|
||
'xn--55qx5d': '.موريتانيا', # Mauritania
|
||
'xn--80akhbyknj4f': '.հայ', # Armenia
|
||
'xn--80asehdb': '.бел', # Belarus
|
||
'xn--90a3ac': '.мкд', # Macedonia
|
||
'xn--45brj9c': '.бг', # Bulgaria
|
||
'xn--p1ai': '.рф', # Russia (duplicate)
|
||
'xn--hlcj6aya': '.سوريا', # Syria
|
||
'xn--mgbcpq6gpa1a': '.السعودية', # Saudi Arabia
|
||
'xn--ogbpf8fl': '.سودان', # Sudan
|
||
'xn--kprw13d': '.გე', # Georgia
|
||
'xn--kpry57d': '.გე', # Georgia (alternative)
|
||
'xn--o1ac': '.ελ', # Greece
|
||
'xn--80ao21a': '.қаз', # Kazakhstan
|
||
'xn--fgbp6a': '.مغرب', # Morocco
|
||
'xn--j1amh': '.укр', # Ukraine
|
||
'xn--mix891f': '.ไทย', # Thailand
|
||
'xn--mix082f': '.ไทย', # Thailand (alternative)
|
||
'xn--mxtq1m': '.新加坡', # Singapore
|
||
'xn--node': '.नेट', # India (NET)
|
||
'xn--j6w193g': '.香港', # Hong Kong
|
||
'xn--55qw42g': '.中国', # China (alternative)
|
||
'xn--5tzm5g': '.台灣', # Taiwan
|
||
'xn--6frz82g': '.ලංකා', # Sri Lanka
|
||
'xn--80adxhks': '.мкд', # Macedonia (alternative)
|
||
'xn--l1acc': '.мон', # Mongolia
|
||
'xn--9t4b11yi5a': '.இலங்கை', # Sri Lanka (alternative)
|
||
'xn--rhqv96g': '.世博', # Expo
|
||
'xn--0zwm56d': '.澳洲', # Australia
|
||
'xn--czru2d': '.कोम', # India (COM)
|
||
'xn--d1acj3b': '.дети', # Kids
|
||
'xn--d1alf': '.москва', # Moscow
|
||
'xn--h2brj9c': '.срб', # Serbia
|
||
'xn--h2breg3eve': '.срб', # Serbia (alternative)
|
||
'xn--k1x57d': '.新加坡', # Singapore (alternative)
|
||
'xn--mgbbh1a71e': '.امارات', # UAE
|
||
'xn--mgbaam7a8h': '.الاردن', # Jordan
|
||
'xn--mgbayh7gpa': '.الاردن', # Jordan (alternative)
|
||
'xn--y9a3aq': '.հայ', # Armenia (alternative)
|
||
'xn--mgbx4cd0ab': '.مليسيا', # Malaysia
|
||
'xn--54b7fta0cc': '.بھارت', # India
|
||
'xn--90ae5b': '.بازار', # Iran (Bazaar)
|
||
'xn--l1nej': '.موقع', # Iran (Site)
|
||
'xn--mgbgu82a': '.شبكة', # Iran (Network)
|
||
'xn--fiq64b': '.कॉम', # India (COM alternative)
|
||
'xn--kcrx77d1x4a': '.சிங்கப்பூர்', # Singapore (Tamil)
|
||
'xn--i1b6b1a6a2e': '.संगठन', # India (Organization)
|
||
'xn--nqv7f': '.فلسطين', # Palestine
|
||
'xn--qqh11a': '.مصر', # Egypt (alternative)
|
||
'xn--c1avg': '.бел', # Belarus (alternative)
|
||
'xn--e1a4c': '.ею', # European Union
|
||
'xn--8h0a': '.ايران', # Iran
|
||
'xn--1qqw23a': '.游戏', # China (Game)
|
||
'xn--3bst00m': '.公司', # China (Company)
|
||
'xn--45br5cyl': '.бг', # Bulgaria (alternative)
|
||
'xn--s9brj9c': '.срб', # Serbia (alternative)
|
||
'xn--czrs0t': '.कोम', # India (COM alternative)
|
||
'xn--czr694b': '.कॉम', # India (COM alternative)
|
||
'xn--gecrj9c': '.克罗地亚', # Croatia
|
||
'xn--p1ai': '.рф', # Russia (duplicate)
|
||
'xn--9krt00a': '.日本', # Japan
|
||
'xn--xkc2dl3a5ee0h': '.ಭಾರತ', # India (Kannada)
|
||
'xn--fzys8d69uvgm': '.تونس', # Tunisia
|
||
'xn--fzc2c9e2c': '.السعودية', # Saudi Arabia (alternative)
|
||
}
|
||
|
||
def punycode_to_unicode(punycode):
|
||
"""Convert Punycode to Unicode representation"""
|
||
try:
|
||
if punycode.startswith('xn--'):
|
||
return idna.decode(punycode)
|
||
return punycode
|
||
except:
|
||
return IDN_MAPPINGS.get(punycode, punycode)
|
||
|
||
def get_all_idn_tlds():
|
||
"""Get all IDN TLDs with their Unicode representations"""
|
||
import requests
|
||
|
||
response = requests.get('https://data.iana.org/TLD/tlds-alpha-by-domain.txt')
|
||
lines = response.text.strip().split('\n')
|
||
tlds = []
|
||
|
||
for line in lines:
|
||
line = line.strip()
|
||
if line and not line.startswith('#'):
|
||
tlds.append(line.lower())
|
||
|
||
idn_tlds = []
|
||
for tld in tlds:
|
||
if tld.startswith('xn--'):
|
||
unicode_form = punycode_to_unicode(tld)
|
||
idn_tlds.append({
|
||
'punycode': tld,
|
||
'unicode': unicode_form,
|
||
'display': f"{tld} ({unicode_form})"
|
||
})
|
||
|
||
return idn_tlds
|
||
|
||
if __name__ == "__main__":
|
||
idn_tlds = get_all_idn_tlds()
|
||
print(f"Found {len(idn_tlds)} IDN TLDs:")
|
||
for tld in idn_tlds[:20]:
|
||
print(f" {tld['display']}")
|