python类tree()的实例源码

machine_learning.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def decis_tree(wine_set):
    # to remember the if the wine_set red or white
    w = wine_set

    # subset data for better tree visibility
    # wine_set = wine_set[:100]

    # recode quality (response variable) into 2 groups: 0:{3,4,5}, 1:{6,7,8,9}
    recode = {3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1}
    wine_set['quality_c'] = wine_set['quality'].map(recode)

    # round explanatory data for easier tree
    # wine_set["residual_sugar"] = wine_set["residual_sugar"].round()
    # wine_set["alcohol"] = wine_set["alcohol"].round()

    # split into training and testing sets
    predictors = wine_set[["residual_sugar", 'alcohol']]
    targets = wine_set.quality_c

    pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)

    # build model on training data
    classifier = DecisionTreeClassifier()
    classifier = classifier.fit(pred_train, tar_train)

    predictions = classifier.predict(pred_test)

    # print the confusion matrix and accuracy of the model
    print(sklearn.metrics.confusion_matrix(tar_test, predictions))
    print(sklearn.metrics.accuracy_score(tar_test, predictions))

    # export the tree for viewing
    if w.equals(red):
        export_graphviz(classifier, out_file="red_decision_tree.dot")
    else:
        export_graphviz(classifier, out_file="white_decision_tree.dot")
    # to view the decision tree create a .pdf file from the created .dot file
    # by typing in the terminal from this directory: dot -Tpdf decision_tree.dot -o decision_tree.pdf
# print('----------------Decision Tree------------------------')
# call(decis_tree)


# ____________________________________Random Forests________________
random_forest.py 文件源码 项目:SinaWeiboSpider 作者: SuperSaiyanSSS 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def rand_forest_train(self):
        # ??????????
        users = pd.read_csv('names.csv')
        # ??similarity?platform?reputation?entropy????????????
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        y = users['human_or_machine']

        # ?????????? 25%???????
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)

        # ????????????????
        from sklearn.feature_extraction import DictVectorizer
        vec = DictVectorizer(sparse=False)
        X_train = vec.fit_transform(X_train.to_dict(orient='record'))
        X_test = vec.transform(X_test.to_dict(orient='record'))

        # ?????????????????????
        from sklearn.tree import DecisionTreeClassifier
        dtc = DecisionTreeClassifier()
        dtc.fit(X_train, y_train)
        dtc_y_pred = dtc.predict(X_test)

        # ???????????????????????
        from sklearn.ensemble import RandomForestClassifier
        rfc = RandomForestClassifier()
        rfc.fit(X_train, y_train)
        rfc_y_pred = rfc.predict(X_test)

        # ???????????????????????
        from sklearn.ensemble import GradientBoostingClassifier
        gbc = GradientBoostingClassifier()
        gbc.fit(X_train, y_train)
        gbc_y_pred = gbc.predict(X_test)

        from sklearn.metrics import classification_report
        # ??????????????????? ?????????? ??? F1??
        print("??????????", dtc.score(X_test, y_test))
        print(classification_report(dtc_y_pred, y_test))

        # ??????????????????????????????? ??? F1??
        print("????????????", rfc.score(X_test, y_test))
        print(classification_report(rfc_y_pred, y_test))

        # ??????????????????????????????? ??? F1??
        print("????????????", gbc.score(X_test, y_test))
        print(classification_report(gbc_y_pred, y_test))


        users = pd.read_csv('values.csv')

        # ??????????
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        X = vec.transform(X.to_dict(orient='record'))
        print(rfc.predict(X))

        self.dtc = dtc
        self.rfc = rfc
        self.gbc = gbc
decision_tree_manual_classifier.py 文件源码 项目:SLIC_cityscapes 作者: wpqmanu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def decision_tree_manual_classifier(all_feature_data):
    input_data=np.asarray(all_feature_data[0])
    label=np.asarray(all_feature_data[1])

    data_for_manual_tree=[]
    for row_index in range(len(all_feature_data[0])):
        current_row=all_feature_data[0][row_index]+[all_feature_data[1][row_index]]
        data_for_manual_tree.append(current_row)

    # # splitting rule
    # set1, set2 = divideset(data_for_manual_tree, 1, 14)
    # # print(set1)
    # print(uniquecounts(set1))
    # print("")
    # # print(set2)
    # print(uniquecounts(set2))
    #
    # print entropy(set1)
    # print entropy(set2)
    # print entropy(data_for_manual_tree)

    tree = buildtree(data_for_manual_tree)


    data=input_data[:,:]
    # data=sklearn.preprocessing.normalize(data,axis=0)

    # clf = DecisionTreeClassifier(criterion="gini",
                                 # splitter="best",
                                 # max_features=None,
                                 # max_depth=5,
                                 # min_samples_leaf=1,
                                 # min_samples_split=2,
                                 # class_weight=None
                                 # )

    for row_index in range(len(all_feature_data[0])):
        to_be_predicted_data=all_feature_data[0][row_index]
        predicted_label=classify(to_be_predicted_data,tree)

    clf = DecisionTreeClassifier()
    fit_clf=clf.fit(data,label)

    result=fit_clf.predict(data)
    accuracy=float(np.sum(result==label))/len(label)
    print "Training accuracy is " + str(accuracy)
    with open("cityscapes.dot", 'w') as f:
        f = tree.export_graphviz(clf, out_file=f)

    return fit_clf


问题


面经


文章

微信
公众号

扫码关注公众号