def random_forests(wine_set):
# recode quality (response variable) into 2 groups: 0:{3,4,5}, 1:{6,7,8,9}
recode = {3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1}
wine_set['quality_c'] = wine_set['quality'].map(recode)
# split into training and testing sets
predictors = wine_set[["density", 'alcohol', 'sulphates', 'pH', 'volatile_acidity', 'chlorides', 'fixed_acidity',
'citric_acid', 'residual_sugar', 'free_sulfur_dioxide', 'total_sulfur_dioxide']]
targets = wine_set.quality_c
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)
# build model on training data#
classifier = RandomForestClassifier(n_estimators=25)
classifier = classifier.fit(pred_train, tar_train)
predictions = classifier.predict(pred_test)
# print the confusion matrix and accuracy of the model
print('confusion matrix:\n', sklearn.metrics.confusion_matrix(tar_test, predictions))
print('\naccuracy:', sklearn.metrics.accuracy_score(tar_test, predictions))
# to display the relative importance of each predictive variable
model = ExtraTreesClassifier()
model.fit(pred_train, tar_train)
print('importance of predictors:')
dct = dict()
for c in range(len(predictors.columns)):
dct[predictors.columns[c]] = model.feature_importances_[c]
print(sorted(dct.items(), key=operator.itemgetter(1), reverse=True))
# run different numbers of trees to see the effect of the number on the accuracy of the prediction
n = 100
accuracy = [0]*n
for i in range(n):
classifier = RandomForestClassifier(n_estimators=i+1)
classifier = classifier.fit(pred_train, tar_train)
predictions = classifier.predict(pred_test)
accuracy[i] = sklearn.metrics.accuracy_score(tar_test, predictions)
plt.plot(range(1, n+1), accuracy)
plt.xlabel("Number of trees")
plt.ylabel("Accuracy of prediction")
plt.title("Effect of the number of trees on the prediction accuracy")
plt.show()
print(accuracy)
# print('----------------Random Forests------------------------')
# call(random_forests)
# ________________________________Lasso Regression__________________________________
machine_learning.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录