def classify(y, x, test_y, test_x):
global data_df, factor_name, left, right, feature, ratio, threshold
y_c = np.zeros(len(y))
y_c[y > 0.02] = 1
y_c[y < -0.02] = -1
min_n = int(0.05 * len(y))
clf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=min_n)
clf.fit(x, y_c)
y_p = clf.predict(x)
fname = "D:\\Cache\\tree.txt"
test_y = y
with open(fname, 'w') as f:
tree.export_graphviz(clf, out_file=f)
f.close()
factor_exchange(factor_name, fname)
left = clf.tree_.children_left
right = clf.tree_.children_right
feature = clf.tree_.feature
threshold = clf.tree_.threshold
disp_tree()
# precision, recall, thresholds = precision_recall_curve(y_c, clf.predict(x))
'''''???????'''
print("mean income is:", str(np.average(test_y)),
"\nwin ratio is: ", str(np.sum(test_y > 0) / len(test_y)))
print("after training\n"
"mean class_1 is: ", str(np.average(test_y[y_p > 0])),
"\nwin ratio is: ", str(np.sum(test_y[y_p > 0] > 0) / np.sum(y_p > 0)),
"\ntotal class_1 is:", str(np.sum(np.sum(y_p > 0))),
"\nmean class_0 is: ", str(np.average(test_y[y_p < 0])))
评论列表
文章目录