def predict_sku():
print('????.')
xgb_model = xgb.Booster({'nthread':-1})
xgb_model.load_model('./model/xgb_sku.model')
Online = pd.read_csv("./feat/online_sku_feat.csv")
Online_drop_cols = ['user_id', 'sku_id', 'cate', 'brand']
donline = xgb.DMatrix(Online.drop(Online_drop_cols, axis=1))
##??
xgb_proba = xgb_model.predict(donline)
sku_proba = Online[['user_id', 'sku_id']]
sku_proba.loc[:,'sku_proba'] = xgb_proba
##???????????????
sku_proba = sku_proba.groupby(['user_id'], as_index=False).apply(lambda t: t[t.sku_proba == t.sku_proba.max()]).reset_index()[['user_id', 'sku_id', 'sku_proba']]
##??????????
user_proba = pd.read_csv("./online_user_proba.csv")
##???????????
sku_proba.sort_values(by="sku_proba", ascending=False, inplace=True)
user_proba.sort_values(by="proba", ascending=False, inplace=True)
##???? ? ???? ???500??
Top_user = user_proba.iloc[:500]
Top_sku = sku_proba.iloc[:500][['user_id', 'sku_id']]
Top_user = sku_proba[sku_proba.user_id.isin(Top_user.user_id)]
Top_user = Top_user.groupby(['user_id'], as_index=False).apply(lambda t: t[t.sku_proba == t.sku_proba.max()]).reset_index()[['user_id', 'sku_id']]
pred = pd.concat([Top_sku, Top_user])
pred = pred.drop_duplicates()
pred = pred[pred.user_id.duplicated()==False]
pred.astype(int).to_csv("online_submit.csv", index=False)
print('????.')
GenerateResult.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录