def generate_LR_model(file_name):
train_df = read_from_file(file_name)
selected_train_df = train_df.filter(regex='label|connectionType_.*|telecomsOperator_.*|sitesetID_.*|positionType_.*|gender_.*|haveBaby_.*|age_scaled')
train_np = selected_train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
print 'Train Logistic Regression Model...'
start_time = datetime.datetime.now()
clf = linear_model.LogisticRegression(penalty='l2',C=1.0,solver='sag',n_jobs=-1, tol=1e-6, max_iter=200)#, class_weight='balanced')
clf.fit(X,y)
end_time = datetime.datetime.now()
print 'Training Done..., Time Cost: '
print (end_time-start_time).seconds
print 'Save Model...'
joblib.dump(clf, 'LR.model')
return clf
评论列表
文章目录