def detect_input(cls, values, sample_size=200):
"""
Return first "from_" method that in more than 50% matches values,
or None.
"""
assert isinstance(values, pd.Series)
values = values.drop_duplicates().dropna()
if len(values) > sample_size:
values = values.sample(sample_size)
strlen = values.str.len().dropna().unique()
for method, *cond in ((cls.from_cc2, len(strlen) == 1 and strlen[0] == 2),
(cls.from_cc3, len(strlen) == 1 and strlen[0] == 3),
(cls.from_cc_name,),
(cls.from_us_state,),
(cls.from_city_eu,),
(cls.from_city_us,),
(cls.from_city_world,),
(cls.from_region,),
(cls.from_fips,),
(cls.from_hasc, np.in1d(strlen, [2, 5, 8]).all())):
if cond and not cond[0]:
continue
if sum(map(bool, method(values))) >= len(values) / 2:
return method
return None
评论列表
文章目录