def train_model_for_appcounts(df):
app_df = df[['appCount','age','gender','education','marriageStatus','haveBaby']]
known_app = app_df[app_df.appCount.notnull()].as_matrix()
unknown_app = app_df[app_df.appCount.isnull()].as_matrix()
y = known_app[:, 0]
X = known_app[:, 1:]
print 'Train Xgboost Model(For Missing AppCount)...'
start_time = datetime.datetime.now()
xgb_reg = XGBRegressor(n_estimators=100, max_depth=3)
xgb_reg.fit(X, y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
predicted_app = xgb_reg.predict(unknown_app[:, 1:])
df.loc[ (df.appCount.isnull()), 'appCount' ] = predicted_app
return df, xgb_reg
评论列表
文章目录