def appendWordNetStemmingDict(inputPath='stemmingDict.old', outputPath='stemmingDict',outputEncoding='utf8'):
oldDict = json.load(open(inputPath,'r',encoding='utf8'))
distance = Levenshtein.ratio
fi = open('wordnet.map','r',encoding='utf8')
fo = open(outputPath,'w',encoding='utf8')
for m in list(oldDict):
tmp = set()
for l in list(oldDict[m]):
tmp.add(l[0])
oldDict[m] = set(tmp)
for line in fi:
m = line.strip().split(' ')
if len(m) == 0:
continue
if m[0] not in oldDict:
oldDict[m[0]]=set()
oldDict[m[0]].add(m[1])
for m in list(oldDict):
oldDict[m] = list(oldDict[m])
for i in range(len(oldDict[m])):
if type(oldDict[m][i]) != str or type(m) != str:
print(oldDict[m])
input()
continue
oldDict[m][i] = [oldDict[m][i],distance(oldDict[m][i],m)]
json.dump(oldDict,fo)
fotxt = open(outputPath+'.txt', 'w', encoding=outputEncoding)
for key in oldDict:
fotxt.write(key + ' ' + str(oldDict[key]) + '\n')
fotxt.close()
##
##print('Dumping stemming mpping to json format......')
##generateStemmingDict()
##appendWordNetStemmingDict()
##print('Done!')
评论列表
文章目录