def get_best_split(X, y, criterion):
""" Obtain the best splitting point and resulting children for the data set X, y
Args:
X, y (numpy.ndarray, data set)
criterion (gini or entropy)
Returns:
dict {index: index of the feature, value: feature value, children: left and right children}
"""
best_index, best_value, best_score, children = None, None, 1, None
for index in range(len(X[0])):
for value in np.sort(np.unique(X[:, index])):
groups = split_node(X, y, index, value)
impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion)
if impurity < best_score:
best_index, best_value, best_score, children = index, value, impurity, groups
return {'index': best_index, 'value': best_value, 'children': children}
1decision_tree_submit.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录