def generate_RF_model(file_name):
train_df = read_from_file(file_name)
selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby|hometown|residence')
train_np = selected_train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
print 'Train Random Forest Regression Model...'
start_time = datetime.datetime.now()
rf = RandomForestRegressor(n_estimators=25, n_jobs=-1)#, class_weight='balanced')
rf.fit(X,y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: '
print (end_time-start_time).seconds
print 'Save Model...'
joblib.dump(rf, 'RF.model')
return rf
评论列表
文章目录