def _build(self, tree, examples_idx, features_idx, depth=0):
items, counts = unique(self.y[examples_idx])
if (features_idx.size == 0
or items.size == 1
or examples_idx.size < self.min_samples_split
or depth >= self.max_depth):
node = self._class_node(items, counts)
return node
calc_record = self.splitter.calc(examples_idx, features_idx)
if (calc_record is None
or calc_record.info < self.min_entropy_decrease):
node = self._class_node(items, counts)
return node
split_records = self.splitter.split(examples_idx, calc_record)
features_idx = np.compress(calc_record.alive_features, features_idx)
if not self.is_repeating:
features_idx = np.delete(features_idx,
np.where(features_idx ==
calc_record.feature_idx))
root = Node(calc_record.feature_idx,
is_feature=True,
details=calc_record,
item_count=(items, counts))
for record in split_records:
if record.size == 0:
node = self._class_node(items, counts)
root.add_child(node, record)
else:
root.add_child(self._build(tree, record.bag,
features_idx, depth+1),
record)
return root
评论列表
文章目录