def addToSentenceScore(question, sentence):
score = 0
questionSet = set()
for item in question.split():
questionSet.add(morpher.stem(item.replace("?","")))
sentenceSet = set()
for item in sentence.split():
sentenceSet.add(morpher.stem(item.replace("?","")))
jaccard = float(len(questionSet.intersection(sentenceSet))) / float(len(questionSet.union(sentenceSet)))
common = ' '.join(sentenceSet.intersection(questionSet))
tagCommon = nltk.pos_tag(nltk.word_tokenize(common))
if tagCommon:
for item in tagCommon:
if 'VB' in item[1]:
score += 6
else:
score += 3
# Add sentence and score to a hashmap
sentenceScore[sentence] = score + (jaccard * 10)
return score
# PARSER TO TOKENIZE, REMOVE STOP WORDS, MORPHOLOGY, ADD TO SET
qa.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录