def get_train_data(corpus, **kwargs):
X = []
y = []
documents = corpus.iter_documents()
if count:
documents = islice(documents, count)
for document in tqdm(documents):
try:
text = document.raw()
sents = document.raw_sents()
labels = text2labels(text, sents)
features = sent2features(text)
X.append(features)
y.append(labels)
except Exception as exc:
# TODO:
pass
return train_test_split(X, y, **kwargs)
评论列表
文章目录