def search_query(self, query):
"""
search for query and find most related document for query
http://webhome.cs.uvic.ca/~thomo/svd.pdf
"""
def topN(similarities, N=5):
return np.argsort(similarities)[::-1][:N]
words = query.split(" ")
tokens_ids = []
for word in words:
try:
token_id = self.tokens_mapping[word]
except KeyError:
print("Token not found in tokens mapping dict")
else:
tokens_ids.append(token_id)
query_representation = np.mean(self.tokens_representation[tokens_ids,:], axis=0)
similarities = cosine_similarity(query_representation, self.documents_representation)
topN_documents =[self.documents_mapping[index] for index in topN(similarities[0])]
return topN_documents
评论列表
文章目录