def get_basic_product_feat():
dump_path = './cache/basic_product.csv'
# one-hot coding a1,a2,a3
if os.path.exists(dump_path):
# product = pickle.load(open(dump_path))
product = pd.read_csv(dump_path)
else:
product = pd.read_csv(product_path)
attr1_df = pd.get_dummies(product["a1"], prefix="a1")
attr2_df = pd.get_dummies(product["a2"], prefix="a2")
attr3_df = pd.get_dummies(product["a3"], prefix="a3")
cate_df = pd.get_dummies(product['cate'], prefix='cate')
brand_df = pd.get_dummies(product['brand'], prefix='brand')
# product = pd.concat([product[['sku_id','brand']], attr1_df, attr2_df, attr3_df,cate_df], axis=1)
product = pd.concat([product[['sku_id','brand']], attr1_df, attr2_df, attr3_df, brand_df, cate_df], axis=1)
# pickle.dump(product, open(dump_path, 'w'))
product.to_csv(dump_path, index=False)
print 'finish get basic product info'
return product
评论列表
文章目录