def build_tree(self, dataX, dataY):
if self.verbose: print "build_tree", self.leaf_size
if self.verbose: print "data shape", dataX.shape
#if no elements in subtree, return empty subtree
if dataX.shape[0] == 0: return np.array([])
#if there is only 1 item left or if fewer than leaf size, return mode of data
if dataX.ndim == 1 or dataX.shape[0] <= self.leaf_size: return np.array([-1, stats.mode(dataY).mode[0], -1, -1])
#if all of the data has the same value, return that value
# if not np.all(dataY - dataY[0]):
# print 'all same'
# return np.array([-1, dataY[0],-1,-1])
else:
if self.verbose: print "passed conditions"
i = np.random.randint(dataX.shape[1]-1)
d = np.random.randint(dataX.shape[0],size=2)
for j in range(11):
if dataX[d[0],i] != dataX[d[1],i]: break
else: d[1] = np.random.randint(dataX.shape[0])
if j == 10: return np.array([-1, dataY[d[0]], -1, -1])
splitVal = (dataX[d[0],i] + dataX[d[1],i])/2.0
indices = dataX[:,i] <= splitVal
leftTree = self.build_tree(dataX[indices,:], dataY[indices])
indices = dataX[:, i] > splitVal
rightTree = self.build_tree(dataX[indices,:], dataY[indices])
leftTreeSize = leftTree.shape[0] if leftTree.ndim != 1 else 1
if leftTree.shape[0] == 0 or rightTree.shape[0] == 0: leftTreeSize = 0
root = [i, splitVal, 1, leftTreeSize + 1]
if (leftTree.shape[0] != 0): root = np.vstack((root, leftTree))
if (rightTree.shape[0] != 0): root = np.vstack((root, rightTree))
return np.array(root)
评论列表
文章目录