feature_processor.py 文件源码

python
阅读 33 收藏 0 点赞 0 评论 0

项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码
def convert_categorical(df):
    onecol = df.columns[1]
    onecol_name = df.columns.values.tolist()[1]
    df[onecol] = df[onecol].str.lower()
    categories = pd.unique(df[onecol])


    categories = [x for x in categories if x is not None]

    try:
        categories.remove(' ')
    except:
        pass

    categories = [str(x) for x in categories]

    categories = list(set([str.lower(x).strip() for x in categories]))

    #replaces spaces in middle of word w underscores
    categories = list(set([x.replace(" ", '_') for x in categories]))

    featnames = []
    for i in range(len(categories)):
        if type(categories[i]) is str:
            newfeatstr = onecol_name+'_is_' + categories[i] 
            featnames.append(newfeatstr)
            df[newfeatstr] = (df[onecol] == categories[i])

    onecol_null = onecol_name + "_is_null"
    df[onecol_null] = pd.isnull(df[onecol])
    df[onecol_null] = df[onecol_null].astype(float)
    df = df.drop(onecol, axis=1)
    df[featnames] = df[featnames].astype(float)
    df = df.groupby(config_db['id_column'], sort = False, as_index=False)[featnames].max()
    return df, featnames
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号