qa.py 文件源码-python代码片段

qa.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：NLP_question_answering_system_project 作者: Roshrini 项目源码文件源码

def addToSentenceScore(question, sentence):

    score = 0

    questionSet = set()
    for item in question.split():
        questionSet.add(morpher.stem(item.replace("?","")))

    sentenceSet = set()
    for item in sentence.split():
        sentenceSet.add(morpher.stem(item.replace("?","")))

    jaccard = float(len(questionSet.intersection(sentenceSet))) / float(len(questionSet.union(sentenceSet)))

    common = ' '.join(sentenceSet.intersection(questionSet))
    tagCommon = nltk.pos_tag(nltk.word_tokenize(common))
    if tagCommon:
        for item in tagCommon:
            if 'VB' in item[1]:
                score += 6
            else:
                score += 3

    # Add sentence and score to a hashmap
    sentenceScore[sentence] = score + (jaccard * 10)
    return score

# PARSER TO TOKENIZE, REMOVE STOP WORDS, MORPHOLOGY, ADD TO SET