def xgb_model_select(train_file_name):
train_df = merge_features_to_use(train_file_name)
train_df.drop(['conversionTime'], axis=1, inplace=True)
print 'Train And Fix Missing App Count Value...'
train_df, xgb_appcount = train_model_for_appcounts(train_df)
joblib.dump(xgb_appcount, 'XGB_missing.model')
print train_df.info()
print train_df.describe()
print train_df.isnull().sum()
train_np = train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
print 'Select Model...'
start_time = datetime.datetime.now()
xgb_clf = xgb.XGBRegressor()
parameters = {'n_estimators': [120, 100, 140], 'max_depth':[3,5,7,9], 'gamma':[0.1,0.3,0.5,0.7], 'min_child_weight':[1,3,5,7], }
grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, cv=10, n_jobs=-1)
print("parameters:")
pprint.pprint(parameters)
grid_search.fit(X, y)
print("Best score: %0.3f" % grid_search.best_score_)
print("Best parameters set:")
best_parameters=grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
end_time = datetime.datetime.now()
print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
评论列表
文章目录