coreMF.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:nlpcc2016 作者: huangxiangzhou 项目源码 文件源码
def calScorePreAll(self, countCharDict, qWithoutSubSet,stemmingDict):

        distance = Levenshtein.ratio
        pre = self.pre
        scorePre = 0

        preLowerSet = set(re.split(r' |\.',pre))

        preLower = list(preLowerSet)
        preLowerSet = set()

        for i in range(len(preLower)):
            if preLower[i] in stemmingDict:
                preLower[i] = stemmingDict[preLower[i]][0][0]
            preLowerSet.add(preLower[i])


        maxIntersection = qWithoutSubSet & preLowerSet



        preFactor = 0
        for char in maxIntersection:
            if char in countCharDict:
                preFactor += 1/(countCharDict[char] + 1)
            else:
                preFactor += 1


        if len(maxIntersection) == 0:
            for w1 in qWithoutSubSet:
                for w2 in preLowerSet:
                    if w1 == '' or w2 == '' or w1[0] != w2[0]:
                        continue
                    div = 1
                    if w1 in countCharDict:
                        div = countCharDict[w1] + 1
                    dWord = distance(w1,w2) / div
                    if preFactor < dWord:
                        preFactor = dWord



        if len(pre) != 0:
            scorePre = preFactor / len(qWithoutSubSet | preLowerSet)
        else:
            scorePre = 0



        self.scorePreAll = scorePre


        return scorePre
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号