def get_supervised_data(self, preprocessed, bin_sites,
active_learning=False, random_state=1234,
n_jobs=-1):
"""Compute the feature matrix and the regression values."""
preprocessed, preprocessed_ = tee(preprocessed)
if self.mode == 'sequence':
dists = [attr['dist'] for attr, _ in preprocessed_]
else:
dists = [g.graph['id']['dist'] for g in preprocessed_]
vals = np.array([common.dist_to_val(d, self.max_dist) for d in dists])
if self.mode == 'sequence':
self.vectorizer = SeqVectorizer(auto_weights=True,
**self.vectorizer_args)
else:
self.vectorizer = GraphVectorizer(auto_weights=True,
**self.vectorizer_args)
matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
block_size=400, n_jobs=n_jobs)
return matrix, vals
评论列表
文章目录