Files
planet/backend/app/core/countries.py
2026-03-25 17:19:10 +08:00

281 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from typing import Any, Optional
COUNTRY_ENTRIES = [
("阿富汗", ["Afghanistan", "AF", "AFG"]),
("阿尔巴尼亚", ["Albania", "AL", "ALB"]),
("阿尔及利亚", ["Algeria", "DZ", "DZA"]),
("安道尔", ["Andorra", "AD", "AND"]),
("安哥拉", ["Angola", "AO", "AGO"]),
("安提瓜和巴布达", ["Antigua and Barbuda", "AG", "ATG"]),
("阿根廷", ["Argentina", "AR", "ARG"]),
("亚美尼亚", ["Armenia", "AM", "ARM"]),
("澳大利亚", ["Australia", "AU", "AUS"]),
("奥地利", ["Austria", "AT", "AUT"]),
("阿塞拜疆", ["Azerbaijan", "AZ", "AZE"]),
("巴哈马", ["Bahamas", "BS", "BHS"]),
("巴林", ["Bahrain", "BH", "BHR"]),
("孟加拉国", ["Bangladesh", "BD", "BGD"]),
("巴巴多斯", ["Barbados", "BB", "BRB"]),
("白俄罗斯", ["Belarus", "BY", "BLR"]),
("比利时", ["Belgium", "BE", "BEL"]),
("伯利兹", ["Belize", "BZ", "BLZ"]),
("贝宁", ["Benin", "BJ", "BEN"]),
("不丹", ["Bhutan", "BT", "BTN"]),
("玻利维亚", ["Bolivia", "BO", "BOL", "Bolivia (Plurinational State of)"]),
("波斯尼亚和黑塞哥维那", ["Bosnia and Herzegovina", "BA", "BIH"]),
("博茨瓦纳", ["Botswana", "BW", "BWA"]),
("巴西", ["Brazil", "BR", "BRA"]),
("文莱", ["Brunei", "BN", "BRN", "Brunei Darussalam"]),
("保加利亚", ["Bulgaria", "BG", "BGR"]),
("布基纳法索", ["Burkina Faso", "BF", "BFA"]),
("布隆迪", ["Burundi", "BI", "BDI"]),
("柬埔寨", ["Cambodia", "KH", "KHM"]),
("喀麦隆", ["Cameroon", "CM", "CMR"]),
("加拿大", ["Canada", "CA", "CAN"]),
("佛得角", ["Cape Verde", "CV", "CPV", "Cabo Verde"]),
("中非", ["Central African Republic", "CF", "CAF"]),
("乍得", ["Chad", "TD", "TCD"]),
("智利", ["Chile", "CL", "CHL"]),
("中国", ["China", "CN", "CHN", "Mainland China", "PRC", "People's Republic of China"]),
("中国(香港)", ["Hong Kong", "HK", "HKG", "Hong Kong SAR", "China Hong Kong", "Hong Kong, China"]),
("中国(澳门)", ["Macao", "Macau", "MO", "MAC", "Macao SAR", "China Macao", "Macau, China"]),
("中国(台湾)", ["Taiwan", "TW", "TWN", "Chinese Taipei", "Taiwan, China"]),
("哥伦比亚", ["Colombia", "CO", "COL"]),
("科摩罗", ["Comoros", "KM", "COM"]),
("刚果(布)", ["Republic of the Congo", "Congo", "Congo-Brazzaville", "CG", "COG"]),
("刚果(金)", ["Democratic Republic of the Congo", "DR Congo", "Congo-Kinshasa", "CD", "COD"]),
("哥斯达黎加", ["Costa Rica", "CR", "CRI"]),
("科特迪瓦", ["Cote d'Ivoire", "Côte d'Ivoire", "Ivory Coast", "CI", "CIV"]),
("克罗地亚", ["Croatia", "HR", "HRV"]),
("古巴", ["Cuba", "CU", "CUB"]),
("塞浦路斯", ["Cyprus", "CY", "CYP"]),
("捷克", ["Czech Republic", "Czechia", "CZ", "CZE"]),
("丹麦", ["Denmark", "DK", "DNK"]),
("吉布提", ["Djibouti", "DJ", "DJI"]),
("多米尼克", ["Dominica", "DM", "DMA"]),
("多米尼加", ["Dominican Republic", "DO", "DOM"]),
("厄瓜多尔", ["Ecuador", "EC", "ECU"]),
("埃及", ["Egypt", "EG", "EGY"]),
("萨尔瓦多", ["El Salvador", "SV", "SLV"]),
("赤道几内亚", ["Equatorial Guinea", "GQ", "GNQ"]),
("厄立特里亚", ["Eritrea", "ER", "ERI"]),
("爱沙尼亚", ["Estonia", "EE", "EST"]),
("埃斯瓦蒂尼", ["Eswatini", "SZ", "SWZ", "Swaziland"]),
("埃塞俄比亚", ["Ethiopia", "ET", "ETH"]),
("斐济", ["Fiji", "FJ", "FJI"]),
("芬兰", ["Finland", "FI", "FIN"]),
("法国", ["France", "FR", "FRA"]),
("加蓬", ["Gabon", "GA", "GAB"]),
("冈比亚", ["Gambia", "GM", "GMB"]),
("格鲁吉亚", ["Georgia", "GE", "GEO"]),
("德国", ["Germany", "DE", "DEU"]),
("加纳", ["Ghana", "GH", "GHA"]),
("希腊", ["Greece", "GR", "GRC"]),
("格林纳达", ["Grenada", "GD", "GRD"]),
("危地马拉", ["Guatemala", "GT", "GTM"]),
("几内亚", ["Guinea", "GN", "GIN"]),
("几内亚比绍", ["Guinea-Bissau", "GW", "GNB"]),
("圭亚那", ["Guyana", "GY", "GUY"]),
("海地", ["Haiti", "HT", "HTI"]),
("洪都拉斯", ["Honduras", "HN", "HND"]),
("匈牙利", ["Hungary", "HU", "HUN"]),
("冰岛", ["Iceland", "IS", "ISL"]),
("印度", ["India", "IN", "IND"]),
("印度尼西亚", ["Indonesia", "ID", "IDN"]),
("伊朗", ["Iran", "IR", "IRN", "Iran (Islamic Republic of)"]),
("伊拉克", ["Iraq", "IQ", "IRQ"]),
("爱尔兰", ["Ireland", "IE", "IRL"]),
("以色列", ["Israel", "IL", "ISR"]),
("意大利", ["Italy", "IT", "ITA"]),
("牙买加", ["Jamaica", "JM", "JAM"]),
("日本", ["Japan", "JP", "JPN"]),
("约旦", ["Jordan", "JO", "JOR"]),
("哈萨克斯坦", ["Kazakhstan", "KZ", "KAZ"]),
("肯尼亚", ["Kenya", "KE", "KEN"]),
("基里巴斯", ["Kiribati", "KI", "KIR"]),
("朝鲜", ["North Korea", "Korea, DPRK", "Democratic People's Republic of Korea", "KP", "PRK"]),
("韩国", ["South Korea", "Republic of Korea", "Korea", "KR", "KOR"]),
("科威特", ["Kuwait", "KW", "KWT"]),
("吉尔吉斯斯坦", ["Kyrgyzstan", "KG", "KGZ"]),
("老挝", ["Laos", "Lao PDR", "Lao People's Democratic Republic", "LA", "LAO"]),
("拉脱维亚", ["Latvia", "LV", "LVA"]),
("黎巴嫩", ["Lebanon", "LB", "LBN"]),
("莱索托", ["Lesotho", "LS", "LSO"]),
("利比里亚", ["Liberia", "LR", "LBR"]),
("利比亚", ["Libya", "LY", "LBY"]),
("列支敦士登", ["Liechtenstein", "LI", "LIE"]),
("立陶宛", ["Lithuania", "LT", "LTU"]),
("卢森堡", ["Luxembourg", "LU", "LUX"]),
("马达加斯加", ["Madagascar", "MG", "MDG"]),
("马拉维", ["Malawi", "MW", "MWI"]),
("马来西亚", ["Malaysia", "MY", "MYS"]),
("马尔代夫", ["Maldives", "MV", "MDV"]),
("马里", ["Mali", "ML", "MLI"]),
("马耳他", ["Malta", "MT", "MLT"]),
("马绍尔群岛", ["Marshall Islands", "MH", "MHL"]),
("毛里塔尼亚", ["Mauritania", "MR", "MRT"]),
("毛里求斯", ["Mauritius", "MU", "MUS"]),
("墨西哥", ["Mexico", "MX", "MEX"]),
("密克罗尼西亚", ["Micronesia", "FM", "FSM", "Federated States of Micronesia"]),
("摩尔多瓦", ["Moldova", "MD", "MDA", "Republic of Moldova"]),
("摩纳哥", ["Monaco", "MC", "MCO"]),
("蒙古", ["Mongolia", "MN", "MNG"]),
("黑山", ["Montenegro", "ME", "MNE"]),
("摩洛哥", ["Morocco", "MA", "MAR"]),
("莫桑比克", ["Mozambique", "MZ", "MOZ"]),
("缅甸", ["Myanmar", "MM", "MMR", "Burma"]),
("纳米比亚", ["Namibia", "NA", "NAM"]),
("瑙鲁", ["Nauru", "NR", "NRU"]),
("尼泊尔", ["Nepal", "NP", "NPL"]),
("荷兰", ["Netherlands", "NL", "NLD"]),
("新西兰", ["New Zealand", "NZ", "NZL"]),
("尼加拉瓜", ["Nicaragua", "NI", "NIC"]),
("尼日尔", ["Niger", "NE", "NER"]),
("尼日利亚", ["Nigeria", "NG", "NGA"]),
("北马其顿", ["North Macedonia", "MK", "MKD", "Macedonia"]),
("挪威", ["Norway", "NO", "NOR"]),
("阿曼", ["Oman", "OM", "OMN"]),
("巴基斯坦", ["Pakistan", "PK", "PAK"]),
("帕劳", ["Palau", "PW", "PLW"]),
("巴勒斯坦", ["Palestine", "PS", "PSE", "State of Palestine"]),
("巴拿马", ["Panama", "PA", "PAN"]),
("巴布亚新几内亚", ["Papua New Guinea", "PG", "PNG"]),
("巴拉圭", ["Paraguay", "PY", "PRY"]),
("秘鲁", ["Peru", "PE", "PER"]),
("菲律宾", ["Philippines", "PH", "PHL"]),
("波兰", ["Poland", "PL", "POL"]),
("葡萄牙", ["Portugal", "PT", "PRT"]),
("卡塔尔", ["Qatar", "QA", "QAT"]),
("罗马尼亚", ["Romania", "RO", "ROU"]),
("俄罗斯", ["Russia", "Russian Federation", "RU", "RUS"]),
("卢旺达", ["Rwanda", "RW", "RWA"]),
("圣基茨和尼维斯", ["Saint Kitts and Nevis", "KN", "KNA"]),
("圣卢西亚", ["Saint Lucia", "LC", "LCA"]),
("圣文森特和格林纳丁斯", ["Saint Vincent and the Grenadines", "VC", "VCT"]),
("萨摩亚", ["Samoa", "WS", "WSM"]),
("圣马力诺", ["San Marino", "SM", "SMR"]),
("圣多美和普林西比", ["Sao Tome and Principe", "ST", "STP", "São Tomé and Príncipe"]),
("沙特阿拉伯", ["Saudi Arabia", "SA", "SAU"]),
("塞内加尔", ["Senegal", "SN", "SEN"]),
("塞尔维亚", ["Serbia", "RS", "SRB", "Kosovo", "XK", "XKS", "Republic of Kosovo"]),
("塞舌尔", ["Seychelles", "SC", "SYC"]),
("塞拉利昂", ["Sierra Leone", "SL", "SLE"]),
("新加坡", ["Singapore", "SG", "SGP"]),
("斯洛伐克", ["Slovakia", "SK", "SVK"]),
("斯洛文尼亚", ["Slovenia", "SI", "SVN"]),
("所罗门群岛", ["Solomon Islands", "SB", "SLB"]),
("索马里", ["Somalia", "SO", "SOM"]),
("南非", ["South Africa", "ZA", "ZAF"]),
("南苏丹", ["South Sudan", "SS", "SSD"]),
("西班牙", ["Spain", "ES", "ESP"]),
("斯里兰卡", ["Sri Lanka", "LK", "LKA"]),
("苏丹", ["Sudan", "SD", "SDN"]),
("苏里南", ["Suriname", "SR", "SUR"]),
("瑞典", ["Sweden", "SE", "SWE"]),
("瑞士", ["Switzerland", "CH", "CHE"]),
("叙利亚", ["Syria", "SY", "SYR", "Syrian Arab Republic"]),
("塔吉克斯坦", ["Tajikistan", "TJ", "TJK"]),
("坦桑尼亚", ["Tanzania", "TZ", "TZA", "United Republic of Tanzania"]),
("泰国", ["Thailand", "TH", "THA"]),
("东帝汶", ["Timor-Leste", "East Timor", "TL", "TLS"]),
("多哥", ["Togo", "TG", "TGO"]),
("汤加", ["Tonga", "TO", "TON"]),
("特立尼达和多巴哥", ["Trinidad and Tobago", "TT", "TTO"]),
("突尼斯", ["Tunisia", "TN", "TUN"]),
("土耳其", ["Turkey", "TR", "TUR", "Türkiye"]),
("土库曼斯坦", ["Turkmenistan", "TM", "TKM"]),
("图瓦卢", ["Tuvalu", "TV", "TUV"]),
("乌干达", ["Uganda", "UG", "UGA"]),
("乌克兰", ["Ukraine", "UA", "UKR"]),
("阿联酋", ["United Arab Emirates", "AE", "ARE", "UAE"]),
("英国", ["United Kingdom", "UK", "GB", "GBR", "Great Britain", "Britain", "England"]),
("美国", ["United States", "United States of America", "US", "USA", "U.S.", "U.S.A."]),
("乌拉圭", ["Uruguay", "UY", "URY"]),
("乌兹别克斯坦", ["Uzbekistan", "UZ", "UZB"]),
("瓦努阿图", ["Vanuatu", "VU", "VUT"]),
("梵蒂冈", ["Vatican City", "Holy See", "VA", "VAT"]),
("委内瑞拉", ["Venezuela", "VE", "VEN", "Venezuela (Bolivarian Republic of)"]),
("越南", ["Vietnam", "Viet Nam", "VN", "VNM"]),
("也门", ["Yemen", "YE", "YEM"]),
("赞比亚", ["Zambia", "ZM", "ZMB"]),
("津巴布韦", ["Zimbabwe", "ZW", "ZWE"]),
]
COUNTRY_OPTIONS = [entry[0] for entry in COUNTRY_ENTRIES]
CANONICAL_COUNTRY_SET = set(COUNTRY_OPTIONS)
INVALID_COUNTRY_VALUES = {
"",
"-",
"--",
"unknown",
"n/a",
"na",
"none",
"null",
"global",
"world",
"worldwide",
"xx",
}
NUMERIC_LIKE_PATTERN = re.compile(r"^[\d\s,._%+\-]+$")
COUNTRY_ALIAS_MAP = {}
COUNTRY_VARIANTS_MAP = {}
for canonical, aliases in COUNTRY_ENTRIES:
COUNTRY_ALIAS_MAP[canonical.casefold()] = canonical
variants = [canonical, *aliases]
COUNTRY_VARIANTS_MAP[canonical] = variants
for alias in aliases:
COUNTRY_ALIAS_MAP[alias.casefold()] = canonical
def normalize_country(value: Any) -> Optional[str]:
if value is None:
return None
if not isinstance(value, str):
return None
normalized = re.sub(r"\s+", " ", value.strip())
normalized = normalized.replace("(", "").replace(")", "")
if not normalized:
return None
lowered = normalized.casefold()
if lowered in INVALID_COUNTRY_VALUES:
return None
if NUMERIC_LIKE_PATTERN.fullmatch(normalized):
return None
if normalized in CANONICAL_COUNTRY_SET:
return normalized
return COUNTRY_ALIAS_MAP.get(lowered)
def get_country_search_variants(value: Any) -> list[str]:
canonical = normalize_country(value)
if canonical is None:
return []
variants = []
seen = set()
for item in COUNTRY_VARIANTS_MAP.get(canonical, [canonical]):
if not isinstance(item, str):
continue
normalized = re.sub(r"\s+", " ", item.strip())
if not normalized:
continue
key = normalized.casefold()
if key in seen:
continue
seen.add(key)
variants.append(normalized)
return variants