def convert_categorical(df):
onecol = df.columns[1]
onecol_name = df.columns.values.tolist()[1]
df[onecol] = df[onecol].str.lower()
categories = pd.unique(df[onecol])
categories = [x for x in categories if x is not None]
try:
categories.remove(' ')
except:
pass
categories = [str(x) for x in categories]
categories = list(set([str.lower(x).strip() for x in categories]))
#replaces spaces in middle of word w underscores
categories = list(set([x.replace(" ", '_') for x in categories]))
featnames = []
for i in range(len(categories)):
if type(categories[i]) is str:
newfeatstr = onecol_name+'_is_' + categories[i]
featnames.append(newfeatstr)
df[newfeatstr] = (df[onecol] == categories[i])
onecol_null = onecol_name + "_is_null"
df[onecol_null] = pd.isnull(df[onecol])
df[onecol_null] = df[onecol_null].astype(float)
df = df.drop(onecol, axis=1)
df[featnames] = df[featnames].astype(float)
df = df.groupby(config_db['id_column'], sort = False, as_index=False)[featnames].max()
return df, featnames
feature_processor.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录