import re from typing import Any, Optional COUNTRY_ENTRIES = [ ("阿富汗", ["Afghanistan", "AF", "AFG"]), ("阿尔巴尼亚", ["Albania", "AL", "ALB"]), ("阿尔及利亚", ["Algeria", "DZ", "DZA"]), ("安道尔", ["Andorra", "AD", "AND"]), ("安哥拉", ["Angola", "AO", "AGO"]), ("安提瓜和巴布达", ["Antigua and Barbuda", "AG", "ATG"]), ("阿根廷", ["Argentina", "AR", "ARG"]), ("亚美尼亚", ["Armenia", "AM", "ARM"]), ("澳大利亚", ["Australia", "AU", "AUS"]), ("奥地利", ["Austria", "AT", "AUT"]), ("阿塞拜疆", ["Azerbaijan", "AZ", "AZE"]), ("巴哈马", ["Bahamas", "BS", "BHS"]), ("巴林", ["Bahrain", "BH", "BHR"]), ("孟加拉国", ["Bangladesh", "BD", "BGD"]), ("巴巴多斯", ["Barbados", "BB", "BRB"]), ("白俄罗斯", ["Belarus", "BY", "BLR"]), ("比利时", ["Belgium", "BE", "BEL"]), ("伯利兹", ["Belize", "BZ", "BLZ"]), ("贝宁", ["Benin", "BJ", "BEN"]), ("不丹", ["Bhutan", "BT", "BTN"]), ("玻利维亚", ["Bolivia", "BO", "BOL", "Bolivia (Plurinational State of)"]), ("波斯尼亚和黑塞哥维那", ["Bosnia and Herzegovina", "BA", "BIH"]), ("博茨瓦纳", ["Botswana", "BW", "BWA"]), ("巴西", ["Brazil", "BR", "BRA"]), ("文莱", ["Brunei", "BN", "BRN", "Brunei Darussalam"]), ("保加利亚", ["Bulgaria", "BG", "BGR"]), ("布基纳法索", ["Burkina Faso", "BF", "BFA"]), ("布隆迪", ["Burundi", "BI", "BDI"]), ("柬埔寨", ["Cambodia", "KH", "KHM"]), ("喀麦隆", ["Cameroon", "CM", "CMR"]), ("加拿大", ["Canada", "CA", "CAN"]), ("佛得角", ["Cape Verde", "CV", "CPV", "Cabo Verde"]), ("中非", ["Central African Republic", "CF", "CAF"]), ("乍得", ["Chad", "TD", "TCD"]), ("智利", ["Chile", "CL", "CHL"]), ("中国", ["China", "CN", "CHN", "Mainland China", "PRC", "People's Republic of China"]), ("中国(香港)", ["Hong Kong", "HK", "HKG", "Hong Kong SAR", "China Hong Kong", "Hong Kong, China"]), ("中国(澳门)", ["Macao", "Macau", "MO", "MAC", "Macao SAR", "China Macao", "Macau, China"]), ("中国(台湾)", ["Taiwan", "TW", "TWN", "Chinese Taipei", "Taiwan, China"]), ("哥伦比亚", ["Colombia", "CO", "COL"]), ("科摩罗", ["Comoros", "KM", "COM"]), ("刚果(布)", ["Republic of the Congo", "Congo", "Congo-Brazzaville", "CG", "COG"]), ("刚果(金)", ["Democratic Republic of the Congo", "DR Congo", "Congo-Kinshasa", "CD", "COD"]), ("哥斯达黎加", ["Costa Rica", "CR", "CRI"]), ("科特迪瓦", ["Cote d'Ivoire", "Côte d'Ivoire", "Ivory Coast", "CI", "CIV"]), ("克罗地亚", ["Croatia", "HR", "HRV"]), ("古巴", ["Cuba", "CU", "CUB"]), ("塞浦路斯", ["Cyprus", "CY", "CYP"]), ("捷克", ["Czech Republic", "Czechia", "CZ", "CZE"]), ("丹麦", ["Denmark", "DK", "DNK"]), ("吉布提", ["Djibouti", "DJ", "DJI"]), ("多米尼克", ["Dominica", "DM", "DMA"]), ("多米尼加", ["Dominican Republic", "DO", "DOM"]), ("厄瓜多尔", ["Ecuador", "EC", "ECU"]), ("埃及", ["Egypt", "EG", "EGY"]), ("萨尔瓦多", ["El Salvador", "SV", "SLV"]), ("赤道几内亚", ["Equatorial Guinea", "GQ", "GNQ"]), ("厄立特里亚", ["Eritrea", "ER", "ERI"]), ("爱沙尼亚", ["Estonia", "EE", "EST"]), ("埃斯瓦蒂尼", ["Eswatini", "SZ", "SWZ", "Swaziland"]), ("埃塞俄比亚", ["Ethiopia", "ET", "ETH"]), ("斐济", ["Fiji", "FJ", "FJI"]), ("芬兰", ["Finland", "FI", "FIN"]), ("法国", ["France", "FR", "FRA"]), ("加蓬", ["Gabon", "GA", "GAB"]), ("冈比亚", ["Gambia", "GM", "GMB"]), ("格鲁吉亚", ["Georgia", "GE", "GEO"]), ("德国", ["Germany", "DE", "DEU"]), ("加纳", ["Ghana", "GH", "GHA"]), ("希腊", ["Greece", "GR", "GRC"]), ("格林纳达", ["Grenada", "GD", "GRD"]), ("危地马拉", ["Guatemala", "GT", "GTM"]), ("几内亚", ["Guinea", "GN", "GIN"]), ("几内亚比绍", ["Guinea-Bissau", "GW", "GNB"]), ("圭亚那", ["Guyana", "GY", "GUY"]), ("海地", ["Haiti", "HT", "HTI"]), ("洪都拉斯", ["Honduras", "HN", "HND"]), ("匈牙利", ["Hungary", "HU", "HUN"]), ("冰岛", ["Iceland", "IS", "ISL"]), ("印度", ["India", "IN", "IND"]), ("印度尼西亚", ["Indonesia", "ID", "IDN"]), ("伊朗", ["Iran", "IR", "IRN", "Iran (Islamic Republic of)"]), ("伊拉克", ["Iraq", "IQ", "IRQ"]), ("爱尔兰", ["Ireland", "IE", "IRL"]), ("以色列", ["Israel", "IL", "ISR"]), ("意大利", ["Italy", "IT", "ITA"]), ("牙买加", ["Jamaica", "JM", "JAM"]), ("日本", ["Japan", "JP", "JPN"]), ("约旦", ["Jordan", "JO", "JOR"]), ("哈萨克斯坦", ["Kazakhstan", "KZ", "KAZ"]), ("肯尼亚", ["Kenya", "KE", "KEN"]), ("基里巴斯", ["Kiribati", "KI", "KIR"]), ("朝鲜", ["North Korea", "Korea, DPRK", "Democratic People's Republic of Korea", "KP", "PRK"]), ("韩国", ["South Korea", "Republic of Korea", "Korea", "KR", "KOR"]), ("科威特", ["Kuwait", "KW", "KWT"]), ("吉尔吉斯斯坦", ["Kyrgyzstan", "KG", "KGZ"]), ("老挝", ["Laos", "Lao PDR", "Lao People's Democratic Republic", "LA", "LAO"]), ("拉脱维亚", ["Latvia", "LV", "LVA"]), ("黎巴嫩", ["Lebanon", "LB", "LBN"]), ("莱索托", ["Lesotho", "LS", "LSO"]), ("利比里亚", ["Liberia", "LR", "LBR"]), ("利比亚", ["Libya", "LY", "LBY"]), ("列支敦士登", ["Liechtenstein", "LI", "LIE"]), ("立陶宛", ["Lithuania", "LT", "LTU"]), ("卢森堡", ["Luxembourg", "LU", "LUX"]), ("马达加斯加", ["Madagascar", "MG", "MDG"]), ("马拉维", ["Malawi", "MW", "MWI"]), ("马来西亚", ["Malaysia", "MY", "MYS"]), ("马尔代夫", ["Maldives", "MV", "MDV"]), ("马里", ["Mali", "ML", "MLI"]), ("马耳他", ["Malta", "MT", "MLT"]), ("马绍尔群岛", ["Marshall Islands", "MH", "MHL"]), ("毛里塔尼亚", ["Mauritania", "MR", "MRT"]), ("毛里求斯", ["Mauritius", "MU", "MUS"]), ("墨西哥", ["Mexico", "MX", "MEX"]), ("密克罗尼西亚", ["Micronesia", "FM", "FSM", "Federated States of Micronesia"]), ("摩尔多瓦", ["Moldova", "MD", "MDA", "Republic of Moldova"]), ("摩纳哥", ["Monaco", "MC", "MCO"]), ("蒙古", ["Mongolia", "MN", "MNG"]), ("黑山", ["Montenegro", "ME", "MNE"]), ("摩洛哥", ["Morocco", "MA", "MAR"]), ("莫桑比克", ["Mozambique", "MZ", "MOZ"]), ("缅甸", ["Myanmar", "MM", "MMR", "Burma"]), ("纳米比亚", ["Namibia", "NA", "NAM"]), ("瑙鲁", ["Nauru", "NR", "NRU"]), ("尼泊尔", ["Nepal", "NP", "NPL"]), ("荷兰", ["Netherlands", "NL", "NLD"]), ("新西兰", ["New Zealand", "NZ", "NZL"]), ("尼加拉瓜", ["Nicaragua", "NI", "NIC"]), ("尼日尔", ["Niger", "NE", "NER"]), ("尼日利亚", ["Nigeria", "NG", "NGA"]), ("北马其顿", ["North Macedonia", "MK", "MKD", "Macedonia"]), ("挪威", ["Norway", "NO", "NOR"]), ("阿曼", ["Oman", "OM", "OMN"]), ("巴基斯坦", ["Pakistan", "PK", "PAK"]), ("帕劳", ["Palau", "PW", "PLW"]), ("巴勒斯坦", ["Palestine", "PS", "PSE", "State of Palestine"]), ("巴拿马", ["Panama", "PA", "PAN"]), ("巴布亚新几内亚", ["Papua New Guinea", "PG", "PNG"]), ("巴拉圭", ["Paraguay", "PY", "PRY"]), ("秘鲁", ["Peru", "PE", "PER"]), ("菲律宾", ["Philippines", "PH", "PHL"]), ("波兰", ["Poland", "PL", "POL"]), ("葡萄牙", ["Portugal", "PT", "PRT"]), ("卡塔尔", ["Qatar", "QA", "QAT"]), ("罗马尼亚", ["Romania", "RO", "ROU"]), ("俄罗斯", ["Russia", "Russian Federation", "RU", "RUS"]), ("卢旺达", ["Rwanda", "RW", "RWA"]), ("圣基茨和尼维斯", ["Saint Kitts and Nevis", "KN", "KNA"]), ("圣卢西亚", ["Saint Lucia", "LC", "LCA"]), ("圣文森特和格林纳丁斯", ["Saint Vincent and the Grenadines", "VC", "VCT"]), ("萨摩亚", ["Samoa", "WS", "WSM"]), ("圣马力诺", ["San Marino", "SM", "SMR"]), ("圣多美和普林西比", ["Sao Tome and Principe", "ST", "STP", "São Tomé and Príncipe"]), ("沙特阿拉伯", ["Saudi Arabia", "SA", "SAU"]), ("塞内加尔", ["Senegal", "SN", "SEN"]), ("塞尔维亚", ["Serbia", "RS", "SRB", "Kosovo", "XK", "XKS", "Republic of Kosovo"]), ("塞舌尔", ["Seychelles", "SC", "SYC"]), ("塞拉利昂", ["Sierra Leone", "SL", "SLE"]), ("新加坡", ["Singapore", "SG", "SGP"]), ("斯洛伐克", ["Slovakia", "SK", "SVK"]), ("斯洛文尼亚", ["Slovenia", "SI", "SVN"]), ("所罗门群岛", ["Solomon Islands", "SB", "SLB"]), ("索马里", ["Somalia", "SO", "SOM"]), ("南非", ["South Africa", "ZA", "ZAF"]), ("南苏丹", ["South Sudan", "SS", "SSD"]), ("西班牙", ["Spain", "ES", "ESP"]), ("斯里兰卡", ["Sri Lanka", "LK", "LKA"]), ("苏丹", ["Sudan", "SD", "SDN"]), ("苏里南", ["Suriname", "SR", "SUR"]), ("瑞典", ["Sweden", "SE", "SWE"]), ("瑞士", ["Switzerland", "CH", "CHE"]), ("叙利亚", ["Syria", "SY", "SYR", "Syrian Arab Republic"]), ("塔吉克斯坦", ["Tajikistan", "TJ", "TJK"]), ("坦桑尼亚", ["Tanzania", "TZ", "TZA", "United Republic of Tanzania"]), ("泰国", ["Thailand", "TH", "THA"]), ("东帝汶", ["Timor-Leste", "East Timor", "TL", "TLS"]), ("多哥", ["Togo", "TG", "TGO"]), ("汤加", ["Tonga", "TO", "TON"]), ("特立尼达和多巴哥", ["Trinidad and Tobago", "TT", "TTO"]), ("突尼斯", ["Tunisia", "TN", "TUN"]), ("土耳其", ["Turkey", "TR", "TUR", "Türkiye"]), ("土库曼斯坦", ["Turkmenistan", "TM", "TKM"]), ("图瓦卢", ["Tuvalu", "TV", "TUV"]), ("乌干达", ["Uganda", "UG", "UGA"]), ("乌克兰", ["Ukraine", "UA", "UKR"]), ("阿联酋", ["United Arab Emirates", "AE", "ARE", "UAE"]), ("英国", ["United Kingdom", "UK", "GB", "GBR", "Great Britain", "Britain", "England"]), ("美国", ["United States", "United States of America", "US", "USA", "U.S.", "U.S.A."]), ("乌拉圭", ["Uruguay", "UY", "URY"]), ("乌兹别克斯坦", ["Uzbekistan", "UZ", "UZB"]), ("瓦努阿图", ["Vanuatu", "VU", "VUT"]), ("梵蒂冈", ["Vatican City", "Holy See", "VA", "VAT"]), ("委内瑞拉", ["Venezuela", "VE", "VEN", "Venezuela (Bolivarian Republic of)"]), ("越南", ["Vietnam", "Viet Nam", "VN", "VNM"]), ("也门", ["Yemen", "YE", "YEM"]), ("赞比亚", ["Zambia", "ZM", "ZMB"]), ("津巴布韦", ["Zimbabwe", "ZW", "ZWE"]), ] COUNTRY_OPTIONS = [entry[0] for entry in COUNTRY_ENTRIES] CANONICAL_COUNTRY_SET = set(COUNTRY_OPTIONS) INVALID_COUNTRY_VALUES = { "", "-", "--", "unknown", "n/a", "na", "none", "null", "global", "world", "worldwide", "xx", } NUMERIC_LIKE_PATTERN = re.compile(r"^[\d\s,._%+\-]+$") COUNTRY_ALIAS_MAP = {} COUNTRY_VARIANTS_MAP = {} for canonical, aliases in COUNTRY_ENTRIES: COUNTRY_ALIAS_MAP[canonical.casefold()] = canonical variants = [canonical, *aliases] COUNTRY_VARIANTS_MAP[canonical] = variants for alias in aliases: COUNTRY_ALIAS_MAP[alias.casefold()] = canonical def normalize_country(value: Any) -> Optional[str]: if value is None: return None if not isinstance(value, str): return None normalized = re.sub(r"\s+", " ", value.strip()) normalized = normalized.replace("(", "(").replace(")", ")") if not normalized: return None lowered = normalized.casefold() if lowered in INVALID_COUNTRY_VALUES: return None if NUMERIC_LIKE_PATTERN.fullmatch(normalized): return None if normalized in CANONICAL_COUNTRY_SET: return normalized return COUNTRY_ALIAS_MAP.get(lowered) def get_country_search_variants(value: Any) -> list[str]: canonical = normalize_country(value) if canonical is None: return [] variants = [] seen = set() for item in COUNTRY_VARIANTS_MAP.get(canonical, [canonical]): if not isinstance(item, str): continue normalized = re.sub(r"\s+", " ", item.strip()) if not normalized: continue key = normalized.casefold() if key in seen: continue seen.add(key) variants.append(normalized) return variants