Files
get-domain-suffix-iana/idn_mappings.py
2026-03-11 23:08:57 +05:30

120 lines
4.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Script to map Punycode TLDs to their Unicode representations
"""
import idna
# Known IDN TLD mappings
IDN_MAPPINGS = {
'xn--p1ai': '.рф', # Russia
'xn--fiqs8s': '.中国', # China
'xn--fiqz9s': '.中國', # China (traditional)
'xn--lgbbat1ad8j': '.الجزائر', # Algeria
'xn--yfro4i67o': '.קום', # Israel (KOM)
'xn--4gbrim': '.مصر', # Egypt
'xn--55qx5d': '.موريتانيا', # Mauritania
'xn--80akhbyknj4f': '.հայ', # Armenia
'xn--80asehdb': '.бел', # Belarus
'xn--90a3ac': '.мкд', # Macedonia
'xn--45brj9c': '.бг', # Bulgaria
'xn--p1ai': '.рф', # Russia (duplicate)
'xn--hlcj6aya': '.سوريا', # Syria
'xn--mgbcpq6gpa1a': '.السعودية', # Saudi Arabia
'xn--ogbpf8fl': '.سودان', # Sudan
'xn--kprw13d': '.გე', # Georgia
'xn--kpry57d': '.გე', # Georgia (alternative)
'xn--o1ac': '.ελ', # Greece
'xn--80ao21a': '.қаз', # Kazakhstan
'xn--fgbp6a': '.مغرب', # Morocco
'xn--j1amh': '.укр', # Ukraine
'xn--mix891f': '.ไทย', # Thailand
'xn--mix082f': '.ไทย', # Thailand (alternative)
'xn--mxtq1m': '.新加坡', # Singapore
'xn--node': '.नेट', # India (NET)
'xn--j6w193g': '.香港', # Hong Kong
'xn--55qw42g': '.中国', # China (alternative)
'xn--5tzm5g': '.台灣', # Taiwan
'xn--6frz82g': '.ලංකා', # Sri Lanka
'xn--80adxhks': '.мкд', # Macedonia (alternative)
'xn--l1acc': '.мон', # Mongolia
'xn--9t4b11yi5a': '.இலங்கை', # Sri Lanka (alternative)
'xn--rhqv96g': '.世博', # Expo
'xn--0zwm56d': '.澳洲', # Australia
'xn--czru2d': '.कोम', # India (COM)
'xn--d1acj3b': '.дети', # Kids
'xn--d1alf': '.москва', # Moscow
'xn--h2brj9c': '.срб', # Serbia
'xn--h2breg3eve': '.срб', # Serbia (alternative)
'xn--k1x57d': '.新加坡', # Singapore (alternative)
'xn--mgbbh1a71e': '.امارات', # UAE
'xn--mgbaam7a8h': '.الاردن', # Jordan
'xn--mgbayh7gpa': '.الاردن', # Jordan (alternative)
'xn--y9a3aq': '.հայ', # Armenia (alternative)
'xn--mgbx4cd0ab': '.مليسيا', # Malaysia
'xn--54b7fta0cc': '.بھارت', # India
'xn--90ae5b': '.بازار', # Iran (Bazaar)
'xn--l1nej': '.موقع', # Iran (Site)
'xn--mgbgu82a': '.شبكة', # Iran (Network)
'xn--fiq64b': '.कॉम', # India (COM alternative)
'xn--kcrx77d1x4a': '.சிங்கப்பூர்', # Singapore (Tamil)
'xn--i1b6b1a6a2e': '.संगठन', # India (Organization)
'xn--nqv7f': '.فلسطين', # Palestine
'xn--qqh11a': '.مصر', # Egypt (alternative)
'xn--c1avg': '.бел', # Belarus (alternative)
'xn--e1a4c': '.ею', # European Union
'xn--8h0a': '.ايران', # Iran
'xn--1qqw23a': '.游戏', # China (Game)
'xn--3bst00m': '.公司', # China (Company)
'xn--45br5cyl': '.бг', # Bulgaria (alternative)
'xn--s9brj9c': '.срб', # Serbia (alternative)
'xn--czrs0t': '.कोम', # India (COM alternative)
'xn--czr694b': '.कॉम', # India (COM alternative)
'xn--gecrj9c': '.克罗地亚', # Croatia
'xn--p1ai': '.рф', # Russia (duplicate)
'xn--9krt00a': '.日本', # Japan
'xn--xkc2dl3a5ee0h': '.ಭಾರತ', # India (Kannada)
'xn--fzys8d69uvgm': '.تونس', # Tunisia
'xn--fzc2c9e2c': '.السعودية', # Saudi Arabia (alternative)
}
def punycode_to_unicode(punycode):
"""Convert Punycode to Unicode representation"""
try:
if punycode.startswith('xn--'):
return idna.decode(punycode)
return punycode
except:
return IDN_MAPPINGS.get(punycode, punycode)
def get_all_idn_tlds():
"""Get all IDN TLDs with their Unicode representations"""
import requests
response = requests.get('https://data.iana.org/TLD/tlds-alpha-by-domain.txt')
lines = response.text.strip().split('\n')
tlds = []
for line in lines:
line = line.strip()
if line and not line.startswith('#'):
tlds.append(line.lower())
idn_tlds = []
for tld in tlds:
if tld.startswith('xn--'):
unicode_form = punycode_to_unicode(tld)
idn_tlds.append({
'punycode': tld,
'unicode': unicode_form,
'display': f"{tld} ({unicode_form})"
})
return idn_tlds
if __name__ == "__main__":
idn_tlds = get_all_idn_tlds()
print(f"Found {len(idn_tlds)} IDN TLDs:")
for tld in idn_tlds[:20]:
print(f" {tld['display']}")