def generate_GBDT_model(file_name):
train_df = read_from_file(file_name)
#featrue 18
selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby|hometown|residence')
train_np = selected_train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
print 'Train Gradient Boosting Regression Model...'
start_time = datetime.datetime.now()
gbdt = GradientBoostingRegressor(n_estimators=120, max_depth=10) #, class_weight='balanced')
gbdt.fit(X,y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: '
print (end_time - start_time).seconds
print 'Save Model...'
joblib.dump(gbdt, 'GBDT.model')
return gbdt
评论列表
文章目录