def test_main(self):
categories, documents = get_docs_categories()
clean_function = lambda text: '' if text.startswith('[') else text
entity_types = set(['GPE'])
term_doc_mat = (
TermDocMatrixFactory(
category_text_iter=zip(categories, documents),
clean_function=clean_function,
nlp=_testing_nlp,
feats_from_spacy_doc=FeatsFromSpacyDoc(entity_types_to_censor=entity_types)
).build()
)
clf = PassiveAggressiveClassifier(n_iter=5, C=0.5, n_jobs=-1, random_state=0)
fdc = FeatsFromDoc(term_doc_mat._term_idx_store,
clean_function=clean_function,
feats_from_spacy_doc=FeatsFromSpacyDoc(
entity_types_to_censor=entity_types)).set_nlp(_testing_nlp)
tfidf = TfidfTransformer(norm='l1')
X = tfidf.fit_transform(term_doc_mat._X)
clf.fit(X, term_doc_mat._y)
X_to_predict = fdc.feats_from_doc('Did sometimes march UNKNOWNWORD')
pred = clf.predict(tfidf.transform(X_to_predict))
dec = clf.decision_function(X_to_predict)
test_termDocMatrixFactory.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录