def train_predictor(df, markov_blanket, p_train=0.6):
# DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1,
# min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None,
# min_impurity_split=1e-07, class_weight=None, presort=False)
# RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1,
# min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
# min_impurity_split=1e-07, bootstrap=True, oob_score=False, n_jobs=1, random_state=None,
# verbose=0, warm_start=False, class_weight=None)
rf = RandomForestClassifier(n_estimators=5)
clf1 = tree.DecisionTreeClassifier(max_leaf_nodes=10,class_weight=None)
x = df[list(markov_blanket)].values
y = df["TAR10"].values
n_samples = x.shape[0]
n_train = int(np.round(p_train * n_samples))
xt = x[:n_train, :]
yt = y[:n_train]
n_check = n_samples - n_train
xc = x[n_train:, :]
yc = y[n_train:]
ynames = ["lateral", "alcista"]
xnames = list(markov_blanket)
clf1.fit(xt, yt)
sys.stdout.write("Result INS is {}\n".format(clf1.score(xt, yt)))
sys.stdout.write("Result OOS is {}\n".format(clf1.score(xc, yc)))
scores = confusion_matrix(yt, clf1.predict(xt), labels=[0, 1, ])
scores2 = confusion_matrix(yc, clf1.predict(xc), labels=[0, 1])
print(scores)
print(scores2)
tree.export_graphviz(clf1, out_file='D:\MLmaster\Tree.dot', class_names=ynames, feature_names=xnames)
return rf
评论列表
文章目录