def _find_constrained_bicluster(self, data):
"""Find a k x l bicluster."""
num_rows, num_cols = data.shape
k = random.randint(1, math.ceil(num_rows / 2))
l = random.randint(1, math.ceil(num_cols / 2))
cols = np.random.choice(num_cols, size=l, replace=False)
old_avg, avg = float('-inf'), 0.0
while abs(avg - old_avg) > self.tol:
old_avg = avg
row_sums = np.sum(data[:, cols], axis=1)
rows = bn.argpartition(row_sums, num_rows - k)[-k:] # this is usually faster than rows = np.argsort(row_sums)[-k:]
col_sums = np.sum(data[rows, :], axis=0)
cols = bn.argpartition(col_sums, num_cols - l)[-l:] # this is usually faster than cols = np.argsort(col_sums)[-l:]
avg = np.mean(data[np.ix_(rows, cols)])
return Bicluster(rows, cols)
评论列表
文章目录