def decis_tree(wine_set):
# to remember the if the wine_set red or white
w = wine_set
# subset data for better tree visibility
# wine_set = wine_set[:100]
# recode quality (response variable) into 2 groups: 0:{3,4,5}, 1:{6,7,8,9}
recode = {3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1}
wine_set['quality_c'] = wine_set['quality'].map(recode)
# round explanatory data for easier tree
# wine_set["residual_sugar"] = wine_set["residual_sugar"].round()
# wine_set["alcohol"] = wine_set["alcohol"].round()
# split into training and testing sets
predictors = wine_set[["residual_sugar", 'alcohol']]
targets = wine_set.quality_c
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)
# build model on training data
classifier = DecisionTreeClassifier()
classifier = classifier.fit(pred_train, tar_train)
predictions = classifier.predict(pred_test)
# print the confusion matrix and accuracy of the model
print(sklearn.metrics.confusion_matrix(tar_test, predictions))
print(sklearn.metrics.accuracy_score(tar_test, predictions))
# export the tree for viewing
if w.equals(red):
export_graphviz(classifier, out_file="red_decision_tree.dot")
else:
export_graphviz(classifier, out_file="white_decision_tree.dot")
# to view the decision tree create a .pdf file from the created .dot file
# by typing in the terminal from this directory: dot -Tpdf decision_tree.dot -o decision_tree.pdf
# print('----------------Decision Tree------------------------')
# call(decis_tree)
# ____________________________________Random Forests________________
python类tree()的实例源码
machine_learning.py 文件源码
项目:-Python-Analysis_of_wine_quality
作者: ekolik
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def rand_forest_train(self):
# ??????????
users = pd.read_csv('names.csv')
# ??similarity?platform?reputation?entropy????????????
X = users[['similarity', 'platform', 'reputation', 'entropy']]
y = users['human_or_machine']
# ?????????? 25%???????
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
# ????????????????
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
X_train = vec.fit_transform(X_train.to_dict(orient='record'))
X_test = vec.transform(X_test.to_dict(orient='record'))
# ?????????????????????
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc_y_pred = dtc.predict(X_test)
# ???????????????????????
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc_y_pred = rfc.predict(X_test)
# ???????????????????????
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)
gbc_y_pred = gbc.predict(X_test)
from sklearn.metrics import classification_report
# ??????????????????? ?????????? ??? F1??
print("??????????", dtc.score(X_test, y_test))
print(classification_report(dtc_y_pred, y_test))
# ??????????????????????????????? ??? F1??
print("????????????", rfc.score(X_test, y_test))
print(classification_report(rfc_y_pred, y_test))
# ??????????????????????????????? ??? F1??
print("????????????", gbc.score(X_test, y_test))
print(classification_report(gbc_y_pred, y_test))
users = pd.read_csv('values.csv')
# ??????????
X = users[['similarity', 'platform', 'reputation', 'entropy']]
X = vec.transform(X.to_dict(orient='record'))
print(rfc.predict(X))
self.dtc = dtc
self.rfc = rfc
self.gbc = gbc
decision_tree_manual_classifier.py 文件源码
项目:SLIC_cityscapes
作者: wpqmanu
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def decision_tree_manual_classifier(all_feature_data):
input_data=np.asarray(all_feature_data[0])
label=np.asarray(all_feature_data[1])
data_for_manual_tree=[]
for row_index in range(len(all_feature_data[0])):
current_row=all_feature_data[0][row_index]+[all_feature_data[1][row_index]]
data_for_manual_tree.append(current_row)
# # splitting rule
# set1, set2 = divideset(data_for_manual_tree, 1, 14)
# # print(set1)
# print(uniquecounts(set1))
# print("")
# # print(set2)
# print(uniquecounts(set2))
#
# print entropy(set1)
# print entropy(set2)
# print entropy(data_for_manual_tree)
tree = buildtree(data_for_manual_tree)
data=input_data[:,:]
# data=sklearn.preprocessing.normalize(data,axis=0)
# clf = DecisionTreeClassifier(criterion="gini",
# splitter="best",
# max_features=None,
# max_depth=5,
# min_samples_leaf=1,
# min_samples_split=2,
# class_weight=None
# )
for row_index in range(len(all_feature_data[0])):
to_be_predicted_data=all_feature_data[0][row_index]
predicted_label=classify(to_be_predicted_data,tree)
clf = DecisionTreeClassifier()
fit_clf=clf.fit(data,label)
result=fit_clf.predict(data)
accuracy=float(np.sum(result==label))/len(label)
print "Training accuracy is " + str(accuracy)
with open("cityscapes.dot", 'w') as f:
f = tree.export_graphviz(clf, out_file=f)
return fit_clf