def _suggest_synonyms(self, target_words, words):
suggestions = []
word_synonyms = [(Word(w[0]).get_synsets(pos=VERB), w[1]) for w in target_words]
for w in words:
found = False
synset = (Word(w[0]).get_synsets(pos=VERB), w[1])
if len(synset[0]):
for synonym in [s for s in word_synonyms if len(s[0])]:
similarity = synset[0][0].path_similarity(synonym[0][0])
if similarity == 1.0:
found = True
if 1.0 > similarity > 0.4 and not found:
suggestions.append((synset[0][0].name().split(".")[0], synonym[0][0].name().split(".")[0]))
return suggestions
python类Word()的实例源码
def resolution(surfaceName):
'''
input: a surface name of entity
output: the "normalized" entity name
process: 1) lowercase
2) lemmatization
'''
tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()]
# tmp = [ele.lower() for ele in surfaceName.split()]
return " ".join(tmp)
def normalize(self, word, tag="N"):
"""
Normalizes word using given tag. If no tag is given, NOUN is assumed.
"""
kind = NOUN
if tag.startswith("V"):
kind = VERB
elif tag.startswith("RB"):
kind = ADV
elif tag.startswith("J"):
kind = ADJ
return Word(word).lemmatize(kind).lower()
def to_files_special_dict(dict_name):
writer = open("out_files/" + str(dict_name) + ".txt", "w")
count = 0
for t in words_by_pos:
for w in words_by_pos[t]:
print "\t\t", w
try:
writer.write(w)
writer.write("\n")
count += 1
except:
nevermind = 1
writer.close()
print "[+] saved to files. Saved", count, "words."
# rawjson = open("secret_societies_corpora.json").read() #puts the file as a big string into the variable rawjson
# data = json.loads(rawjson) #json.loads take a string and turns it into a data structure
# for elem in data:
# w = elem["name"]
# pos = tag(w)[-1][1]
# # print "-"*20
# # print w, pos
# add_word(w,pos)
# if pos.startswith("VB") and Word(w).lemmatize('v') is not w:
# w = Word(w).lemmatize('v')
# pos = tag("to " + w)[-1][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)
# if pos.startswith("NN") and Word(w).lemmatize('n') is not w:
# w = Word(w).lemmatize('n')
# pos = tag(w)[-1][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)
# if pos.startswith("JJ") and Word(w).lemmatize('a') is not w:
# w = Word(w).lemmatize('a')
# pos = tag("a " + w + " thing")[-2][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)
def to_files_special_dict(dict_name):
writer = open("out_files/" + str(dict_name) + ".txt", "w")
count = 0
for t in words_by_pos:
for w in words_by_pos[t]:
print "\t\t", w
try:
writer.write(w)
writer.write("\n")
count += 1
except:
nevermind = 1
writer.close()
print "[+] saved to files. Saved", count, "words."
# rawjson = open("secret_societies_corpora.json").read() #puts the file as a big string into the variable rawjson
# data = json.loads(rawjson) #json.loads take a string and turns it into a data structure
# for elem in data:
# w = elem["name"]
# pos = tag(w)[-1][1]
# # print "-"*20
# # print w, pos
# add_word(w,pos)
# if pos.startswith("VB") and Word(w).lemmatize('v') is not w:
# w = Word(w).lemmatize('v')
# pos = tag("to " + w)[-1][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)
# if pos.startswith("NN") and Word(w).lemmatize('n') is not w:
# w = Word(w).lemmatize('n')
# pos = tag(w)[-1][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)
# if pos.startswith("JJ") and Word(w).lemmatize('a') is not w:
# w = Word(w).lemmatize('a')
# pos = tag("a " + w + " thing")[-2][1]
# # print "-"*5
# # print w, pos
# add_word(w,pos)