def _starts_with_output(data, col):
'''
Helper function for to_integers in cases where
the feature is categorized based on a common
first character of a string.
'''
data[col] = data[col].fillna('0')
temp_df = _category_starts_with(data, col)
temp_df['start_char'] = temp_df[0]
temp_df = temp_df.drop(0, axis=1)
reference_df = temp_df.set_index('start_char').transpose()
temp_list = []
for i in range(len(data[col])):
for c in temp_df['start_char']:
if data[col][i].startswith(c) == True:
temp_list.append(reference_df[c][0])
if len(data[col]) != len(temp_list):
print "AUTONOMIO ERROR: length of input and output do not match"
else:
return pd.Series(temp_list)
评论列表
文章目录