def build_model(train_file, test_file, attr_file, model_out, predictions_out, algorithm='ridge'):
classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
if algorithm not in classifiers:
raise NotImplementedError("only implemented algorithms: " + str(classifiers))
train_data = pd.read_pickle(train_file)
attrs = read_attrs(attr_file)
target_attr = attrs[0]
usable_attrs = attrs[1:]
if algorithm == 'ridge':
clf = Ridge()
elif algorithm == 'linear':
clf = LinearRegression()
elif algorithm == 'lasso':
clf = Lasso()
elif algorithm == 'en':
clf = ElasticNet()
else:
clf = RandomForestRegressor()
clf.fit(train_data[usable_attrs], train_data[target_attr])
test_data = pd.read_pickle(test_file)
predictions = clf.predict(test_data[usable_attrs])
errors = predictions - test_data[target_attr]
prediction_results = test_data[[target_attr] + usable_attrs].copy()
prediction_results['predicted'] = predictions
prediction_results.to_pickle(predictions_out)
print "Modeling '%s'" % target_attr
print " Train:", train_file, '(%d examples)' % len(train_data)
print " Test:", test_file, '(%d examples)' % len(test_data)
print "Algorithm:", algorithm
if hasattr(clf, 'coef_'):
print 'Coefficients:'
for i,c in enumerate(clf.coef_):
print ' %-20s' % usable_attrs[i] + ':', '%20.4f' % c
print 'MSE : %10.4f' % np.mean(errors ** 2)
print 'medSE: %10.4f' % np.median(errors ** 2)
print 'SSE : %10.4f' % np.sum(errors ** 2)
print 'Variance score: %.4f' % clf.score(test_data[usable_attrs], test_data[target_attr])
pickle.dump(clf, open(model_out, 'wb'))
python类ElasticNet()的实例源码
def getSKLearnModel(modelName):
if modelName == 'LinearRegression':
model = linear_model.LinearRegression()
elif modelName == 'BayesianRidge':
model = linear_model.BayesianRidge()
elif modelName == 'ARDRegression':
model = linear_model.ARDRegression()
elif modelName == 'ElasticNet':
model = linear_model.ElasticNet()
elif modelName == 'HuberRegressor':
model = linear_model.HuberRegressor()
elif modelName == 'Lasso':
model = linear_model.Lasso()
elif modelName == 'LassoLars':
model = linear_model.LassoLars()
elif modelName == 'Rigid':
model = linear_model.Ridge()
elif modelName == 'SGDRegressor':
model = linear_model.SGDRegressor()
elif modelName == 'SVR':
model = SVR()
elif modelName=='MLPClassifier':
model = MLPClassifier()
elif modelName=='KNeighborsClassifier':
model = KNeighborsClassifier()
elif modelName=='SVC':
model = SVC()
elif modelName=='GaussianProcessClassifier':
model = GaussianProcessClassifier()
elif modelName=='DecisionTreeClassifier':
model = DecisionTreeClassifier()
elif modelName=='RandomForestClassifier':
model = RandomForestClassifier()
elif modelName=='AdaBoostClassifier':
model = AdaBoostClassifier()
elif modelName=='GaussianNB':
model = GaussianNB()
elif modelName=='LogisticRegression':
model = linear_model.LogisticRegression()
elif modelName=='QuadraticDiscriminantAnalysis':
model = QuadraticDiscriminantAnalysis()
return model