#!/usr/bin/env python3 """ Script to map Punycode TLDs to their Unicode representations """ import idna # Known IDN TLD mappings IDN_MAPPINGS = { 'xn--p1ai': '.рф', # Russia 'xn--fiqs8s': '.中国', # China 'xn--fiqz9s': '.中國', # China (traditional) 'xn--lgbbat1ad8j': '.الجزائر', # Algeria 'xn--yfro4i67o': '.קום', # Israel (KOM) 'xn--4gbrim': '.مصر', # Egypt 'xn--55qx5d': '.موريتانيا', # Mauritania 'xn--80akhbyknj4f': '.հայ', # Armenia 'xn--80asehdb': '.бел', # Belarus 'xn--90a3ac': '.мкд', # Macedonia 'xn--45brj9c': '.бг', # Bulgaria 'xn--p1ai': '.рф', # Russia (duplicate) 'xn--hlcj6aya': '.سوريا', # Syria 'xn--mgbcpq6gpa1a': '.السعودية', # Saudi Arabia 'xn--ogbpf8fl': '.سودان', # Sudan 'xn--kprw13d': '.გე', # Georgia 'xn--kpry57d': '.გე', # Georgia (alternative) 'xn--o1ac': '.ελ', # Greece 'xn--80ao21a': '.қаз', # Kazakhstan 'xn--fgbp6a': '.مغرب', # Morocco 'xn--j1amh': '.укр', # Ukraine 'xn--mix891f': '.ไทย', # Thailand 'xn--mix082f': '.ไทย', # Thailand (alternative) 'xn--mxtq1m': '.新加坡', # Singapore 'xn--node': '.नेट', # India (NET) 'xn--j6w193g': '.香港', # Hong Kong 'xn--55qw42g': '.中国', # China (alternative) 'xn--5tzm5g': '.台灣', # Taiwan 'xn--6frz82g': '.ලංකා', # Sri Lanka 'xn--80adxhks': '.мкд', # Macedonia (alternative) 'xn--l1acc': '.мон', # Mongolia 'xn--9t4b11yi5a': '.இலங்கை', # Sri Lanka (alternative) 'xn--rhqv96g': '.世博', # Expo 'xn--0zwm56d': '.澳洲', # Australia 'xn--czru2d': '.कोम', # India (COM) 'xn--d1acj3b': '.дети', # Kids 'xn--d1alf': '.москва', # Moscow 'xn--h2brj9c': '.срб', # Serbia 'xn--h2breg3eve': '.срб', # Serbia (alternative) 'xn--k1x57d': '.新加坡', # Singapore (alternative) 'xn--mgbbh1a71e': '.امارات', # UAE 'xn--mgbaam7a8h': '.الاردن', # Jordan 'xn--mgbayh7gpa': '.الاردن', # Jordan (alternative) 'xn--y9a3aq': '.հայ', # Armenia (alternative) 'xn--mgbx4cd0ab': '.مليسيا', # Malaysia 'xn--54b7fta0cc': '.بھارت', # India 'xn--90ae5b': '.بازار', # Iran (Bazaar) 'xn--l1nej': '.موقع', # Iran (Site) 'xn--mgbgu82a': '.شبكة', # Iran (Network) 'xn--fiq64b': '.कॉम', # India (COM alternative) 'xn--kcrx77d1x4a': '.சிங்கப்பூர்', # Singapore (Tamil) 'xn--i1b6b1a6a2e': '.संगठन', # India (Organization) 'xn--nqv7f': '.فلسطين', # Palestine 'xn--qqh11a': '.مصر', # Egypt (alternative) 'xn--c1avg': '.бел', # Belarus (alternative) 'xn--e1a4c': '.ею', # European Union 'xn--8h0a': '.ايران', # Iran 'xn--1qqw23a': '.游戏', # China (Game) 'xn--3bst00m': '.公司', # China (Company) 'xn--45br5cyl': '.бг', # Bulgaria (alternative) 'xn--s9brj9c': '.срб', # Serbia (alternative) 'xn--czrs0t': '.कोम', # India (COM alternative) 'xn--czr694b': '.कॉम', # India (COM alternative) 'xn--gecrj9c': '.克罗地亚', # Croatia 'xn--p1ai': '.рф', # Russia (duplicate) 'xn--9krt00a': '.日本', # Japan 'xn--xkc2dl3a5ee0h': '.ಭಾರತ', # India (Kannada) 'xn--fzys8d69uvgm': '.تونس', # Tunisia 'xn--fzc2c9e2c': '.السعودية', # Saudi Arabia (alternative) } def punycode_to_unicode(punycode): """Convert Punycode to Unicode representation""" try: if punycode.startswith('xn--'): return idna.decode(punycode) return punycode except: return IDN_MAPPINGS.get(punycode, punycode) def get_all_idn_tlds(): """Get all IDN TLDs with their Unicode representations""" import requests response = requests.get('https://data.iana.org/TLD/tlds-alpha-by-domain.txt') lines = response.text.strip().split('\n') tlds = [] for line in lines: line = line.strip() if line and not line.startswith('#'): tlds.append(line.lower()) idn_tlds = [] for tld in tlds: if tld.startswith('xn--'): unicode_form = punycode_to_unicode(tld) idn_tlds.append({ 'punycode': tld, 'unicode': unicode_form, 'display': f"{tld} ({unicode_form})" }) return idn_tlds if __name__ == "__main__": idn_tlds = get_all_idn_tlds() print(f"Found {len(idn_tlds)} IDN TLDs:") for tld in idn_tlds[:20]: print(f" {tld['display']}")