def test_read_files(self):
docs = ['Lorem ipsum', 'Lorem Lorem ipsum Dolor sit AMET', 'consectetur adipisici elit']
thesaurus = {'13542-1': {'prefLabel': ['ipsum'], 'broader': ['0b'], 'related': ['0r'],
'narrower': ['0n'], 'altLabel': []},
'13542-4': {'prefLabel': ['dolor'], 'broader': ['1b'], 'related': ['1r'],
'narrower': ['1n'], 'altLabel': ['amet']},
}
vocabulary = {'13542-1': 1, '13542-4': 0}
fnames = []
for doc in docs:
file = NamedTemporaryFile(mode='w', delete=False)
fnames.append(file.name)
print(doc, file=file)
cf = ConceptAnalyzer(thesaurus, input='filename')
counter = CountVectorizer(analyzer=cf.analyze, vocabulary=vocabulary, input='filename')
res = counter.fit_transform(fnames).todense()
np.testing.assert_array_almost_equal(res, [[0, 1], [2, 1], [0, 0]])
test_concept_count_vectorizer.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录