template_nalba.py 文件源码-python代码片段

def train_predictor(df, markov_blanket, p_train=0.6):
    # DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1,
    #                        min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None,
    #                        min_impurity_split=1e-07, class_weight=None, presort=False)


    # RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1,
    #                        min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
    #                        min_impurity_split=1e-07, bootstrap=True, oob_score=False, n_jobs=1, random_state=None,
    #                        verbose=0, warm_start=False, class_weight=None)





    rf = RandomForestClassifier(n_estimators=5)
    clf1 = tree.DecisionTreeClassifier(max_leaf_nodes=10,class_weight=None)

    x = df[list(markov_blanket)].values
    y = df["TAR10"].values

    n_samples = x.shape[0]
    n_train = int(np.round(p_train * n_samples))
    xt = x[:n_train, :]
    yt = y[:n_train]

    n_check = n_samples - n_train
    xc = x[n_train:, :]
    yc = y[n_train:]
    ynames = ["lateral", "alcista"]
    xnames = list(markov_blanket)
    clf1.fit(xt, yt)
    sys.stdout.write("Result INS is {}\n".format(clf1.score(xt, yt)))
    sys.stdout.write("Result OOS is {}\n".format(clf1.score(xc, yc)))
    scores = confusion_matrix(yt, clf1.predict(xt), labels=[0, 1, ])
    scores2 = confusion_matrix(yc, clf1.predict(xc), labels=[0, 1])
    print(scores)
    print(scores2)
    tree.export_graphviz(clf1, out_file='D:\MLmaster\Tree.dot', class_names=ynames, feature_names=xnames)
    return rf