def getTextConfidence(self, text):
if self.typeOfSim == 'jaccard':
intend_confidenceList = []
for i in self.know_words:
intend_confidenceList.append(jaccard_compare(text, i))
if len(self.know_words) > 0:
return max(intend_confidenceList)
else :
return 0
elif self.typeOfSim == 'gensim':
try:
from gensim import corpora, models, similarities
except Exception as e:
print(e)
dictionary = corpora.Dictionary(self.know_words_remove_stopwords)
corpus = [dictionary.doc2bow(text) for text in self.know_words_remove_stopwords]
lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=2)
new_doc = text
vec_bow = dictionary.doc2bow(new_doc.lower().split())
vec_lsi = lsi[vec_bow]
index = similarities.MatrixSimilarity(lsi[corpus])
sims = index[vec_lsi]
sims = sorted(enumerate(sims), key=lambda item: -item[1])
most_sim = sims[0]
return most_sim[1]
评论列表
文章目录