def get_predict_data(self, preprocessed, n_jobs=-1):
"""Compute the feature matrix and extract the subseq info."""
def _subdict(dic):
subdict = dict((k, dic[k]) for k in [
'tr_name', 'center', 'tr_len'] if k in dic)
return subdict
preprocessed, preprocessed_ = tee(preprocessed)
if self.mode == 'sequence':
info = [_subdict(attr) for attr, _ in preprocessed_]
else:
info = [_subdict(g.graph['id']) for g in preprocessed_]
if self.mode == 'sequence':
self.vectorizer = SeqVectorizer(auto_weights=True,
**self.vectorizer_args)
else:
self.vectorizer = GraphVectorizer(auto_weights=True,
**self.vectorizer_args)
matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
block_size=400, n_jobs=n_jobs)
return matrix, info
评论列表
文章目录