python类Word()的实例源码-面圈网

resume_optimizer.py 文件源码项目：resume-optimizer 作者: mhbuehler 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def _suggest_synonyms(self, target_words, words):
        suggestions = []
        word_synonyms = [(Word(w[0]).get_synsets(pos=VERB), w[1]) for w in target_words]
        for w in words:
            found = False
            synset = (Word(w[0]).get_synsets(pos=VERB), w[1])
            if len(synset[0]):
                for synonym in [s for s in word_synonyms if len(s[0])]:
                    similarity = synset[0][0].path_similarity(synonym[0][0])
                    if similarity == 1.0:
                        found = True
                    if 1.0 > similarity > 0.4 and not found:
                        suggestions.append((synset[0][0].name().split(".")[0], synonym[0][0].name().split(".")[0]))

        return suggestions

entityResolutionAndFilter.py 文件源码项目：SetExpan 作者: mickeystroller 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def resolution(surfaceName):
    '''
    input: a surface name of entity
    output: the "normalized" entity name
    process: 1) lowercase
             2) lemmatization
    '''
    tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()]
    # tmp = [ele.lower() for ele in surfaceName.split()]
    return " ".join(tmp)

text_parser.py 文件源码项目：mAIcroft 作者: thundergolfer 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def normalize(self, word, tag="N"):
        """
        Normalizes word using given tag. If no tag is given, NOUN is assumed.

        """

        kind = NOUN
        if tag.startswith("V"):
            kind = VERB
        elif tag.startswith("RB"):
            kind = ADV
        elif tag.startswith("J"):
            kind = ADJ
        return Word(word).lemmatize(kind).lower()

a.py 文件源码项目：air-poems 作者: leoneckert 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def to_files_special_dict(dict_name):
    writer = open("out_files/" + str(dict_name) + ".txt", "w")
    count = 0
    for t in words_by_pos:
        for w in words_by_pos[t]:
            print "\t\t", w
            try:
                writer.write(w)
                writer.write("\n")
                count += 1
            except:
                nevermind = 1
    writer.close()
    print "[+] saved to files. Saved", count, "words."



# rawjson = open("secret_societies_corpora.json").read()  #puts the file as a big string into the variable rawjson
# data = json.loads(rawjson) #json.loads take a string and turns it into a data structure
# for elem in data:
#   w = elem["name"]
#   pos = tag(w)[-1][1]
#   # print "-"*20
#   # print w, pos
#   add_word(w,pos)

#   if pos.startswith("VB") and Word(w).lemmatize('v') is not w:
#       w = Word(w).lemmatize('v')
#       pos = tag("to " + w)[-1][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)
#   if pos.startswith("NN") and Word(w).lemmatize('n') is not w:
#       w = Word(w).lemmatize('n')
#       pos = tag(w)[-1][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)
#   if pos.startswith("JJ") and Word(w).lemmatize('a') is not w:
#       w = Word(w).lemmatize('a')
#       pos = tag("a " + w + " thing")[-2][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)

a.py 文件源码项目：air-poems 作者: leoneckert 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def to_files_special_dict(dict_name):
    writer = open("out_files/" + str(dict_name) + ".txt", "w")
    count = 0
    for t in words_by_pos:
        for w in words_by_pos[t]:
            print "\t\t", w
            try:
                writer.write(w)
                writer.write("\n")
                count += 1
            except:
                nevermind = 1
    writer.close()
    print "[+] saved to files. Saved", count, "words."



# rawjson = open("secret_societies_corpora.json").read()  #puts the file as a big string into the variable rawjson
# data = json.loads(rawjson) #json.loads take a string and turns it into a data structure
# for elem in data:
#   w = elem["name"]
#   pos = tag(w)[-1][1]
#   # print "-"*20
#   # print w, pos
#   add_word(w,pos)

#   if pos.startswith("VB") and Word(w).lemmatize('v') is not w:
#       w = Word(w).lemmatize('v')
#       pos = tag("to " + w)[-1][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)
#   if pos.startswith("NN") and Word(w).lemmatize('n') is not w:
#       w = Word(w).lemmatize('n')
#       pos = tag(w)[-1][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)
#   if pos.startswith("JJ") and Word(w).lemmatize('a') is not w:
#       w = Word(w).lemmatize('a')
#       pos = tag("a " + w + " thing")[-2][1]
#       # print "-"*5
#       # print w, pos
#       add_word(w,pos)