def generate_XGB_model(train_df):
train_df.drop(['conversionTime'], axis=1, inplace=True)
print 'Train And Fix Missing App Count Value...'
train_df, xgb_appcount = train_model_for_appcounts(train_df)
joblib.dump(xgb_appcount, 'XGB_missing.model')
'''print 'Train And Fix Missing Age Value...'
train_df, xgb_age = train_model_for_age(train_df)
joblib.dump(xgb_age, 'XGB_age.model')'''
train_df.drop(['marriageStatus','haveBaby','sitesetID', 'positionType'], axis=1, inplace=True)
print 'Done'
print train_df.info()
print train_df.describe()
print train_df.isnull().sum()
train_np = train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
print 'Train Xgboost Model...'
start_time = datetime.datetime.now()
xbg_clf = XGBRegressor(n_estimators=100, max_depth=6, objective="binary:logistic", silent=False)
xbg_clf.fit(X,y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
model_df = pd.DataFrame({'columns':list(train_df.columns)[1:], 'values':xbg_clf.feature_importances_})
print model_df
return xbg_clf
评论列表
文章目录