def bm25(p, titles, answers, scores):
original_titles = copy.deepcopy(titles)
titles = [remove_punctuation_re(title) for title in titles]
answers = [remove_punctuation_re(answer) for answer in answers]
p = remove_punctuation_re(p)
titles = [' '.join(jieba.cut(title)) for title in titles]
p = ' '.join(jieba.cut(p))
wordindoc, wordindata, doclen, sumlen = init(titles, False)
global avglen
avglen = 1.0 * sumlen / N
res = search(p, zip(titles, original_titles, answers, scores), wordindoc, wordindata, doclen)
titles, answers, scores = [], [], []
for key, _ in res:
titles.append(key[0])
answers.append(key[1])
scores.append(key[2])
return titles, answers, scores
评论列表
文章目录