def train_model_for_age(df):
age_df = df[['age', 'appCount','gender','education','marriageStatus','haveBaby']]
known_age = age_df[age_df.age != 0].as_matrix()
unknown_age = age_df[age_df.age == 0].as_matrix()
y = known_age[:, 0]
X = known_age[:, 1:]
print 'Train Xgboost Model(For Missing Age)...'
start_time = datetime.datetime.now()
xgb_reg = XGBRegressor(n_estimators=100, max_depth=3)
xgb_reg.fit(X, y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
predicted_age = xgb_reg.predict(unknown_age[:, 1:])
df.loc[ (df.age == 0), 'age' ] = predicted_age
return df, xgb_reg
评论列表
文章目录