def train_clf(self, trainfiles):
# tokens: list of words, labels: list of corresponding labels
# go document by document because of local context
final_labels = []
featmat = []
for trainfile in trainfiles:
for tokens, labels in yield_tokens_labels(trainfile):
final_labels.extend(labels)
featmat.append(self.make_featmat_rep(tokens))
featmat = np.vstack(featmat)
print("training classifier")
clf = logreg(class_weight='balanced', random_state=1)
clf.fit(featmat, final_labels)
self.clf = clf
评论列表
文章目录