def calScorePreAll(self, countCharDict, qWithoutSubSet,stemmingDict):
distance = Levenshtein.ratio
pre = self.pre
scorePre = 0
preLowerSet = set(re.split(r' |\.',pre))
preLower = list(preLowerSet)
preLowerSet = set()
for i in range(len(preLower)):
if preLower[i] in stemmingDict:
preLower[i] = stemmingDict[preLower[i]][0][0]
preLowerSet.add(preLower[i])
maxIntersection = qWithoutSubSet & preLowerSet
preFactor = 0
for char in maxIntersection:
if char in countCharDict:
preFactor += 1/(countCharDict[char] + 1)
else:
preFactor += 1
if len(maxIntersection) == 0:
for w1 in qWithoutSubSet:
for w2 in preLowerSet:
if w1 == '' or w2 == '' or w1[0] != w2[0]:
continue
div = 1
if w1 in countCharDict:
div = countCharDict[w1] + 1
dWord = distance(w1,w2) / div
if preFactor < dWord:
preFactor = dWord
if len(pre) != 0:
scorePre = preFactor / len(qWithoutSubSet | preLowerSet)
else:
scorePre = 0
self.scorePreAll = scorePre
return scorePre
评论列表
文章目录