def test_miislita_high_level(self):
# construct corpus from file
miislita = CorpusMiislita(datapath('miIslita.cor'))
# initialize tfidf transformation and similarity index
tfidf = models.TfidfModel(miislita, miislita.dictionary, normalize=False)
index = similarities.SparseMatrixSimilarity(tfidf[miislita], num_features=len(miislita.dictionary))
# compare to query
query = 'latent semantic indexing'
vec_bow = miislita.dictionary.doc2bow(query.lower().split())
vec_tfidf = tfidf[vec_bow]
# perform a similarity query against the corpus
sims_tfidf = index[vec_tfidf]
# for the expected results see the article
expected = [0.0, 0.2560, 0.7022, 0.1524, 0.3334]
for i, value in enumerate(expected):
self.assertAlmostEqual(sims_tfidf[i], value, 2)
评论列表
文章目录