PreSignature.py 文件源码-python代码片段

def POStagging(self):
        #?????????????????????????
        fin = open('../file/entity_signature.txt', 'r')
        fout = open('../file/pos_signature.txt', 'w+')
        lemmatizer = WordNetLemmatizer()
        j = 0#????????????????????
        num = 0
        while True:
            line = fin.readline()
            if line:
                if '***' in line:
                    #print j, num
                    fout.write(line)
                    pro_num, pro = line.split('.')
                    pro, num = pro.split()
                    pro1, pro2 = pro.split('***')
                    j = 0#???????????
                elif '------' in line:
                    fout.write(line)
                else:

                    # split text into tokens
                    #??
                    num, line = line.split(':', 1)
                    fout.write(num + ':')
                    text_tokens = nltk.word_tokenize(line)

                    t = 0
                    # tag the sentence, using the default NTLK English tagger
                    # POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
                    sentence_tag = nltk.pos_tag(text_tokens)
                    for i in range(len(sentence_tag)):
                        word = sentence_tag[i][0]
                        tag = sentence_tag[i][1]
                        if word == 'Entity1':
                            fout.write('#' + pro1 + '# ')
                        elif word == 'Entity2':
                            fout.write('#' + pro2 + '# ')
                        else:
                            if (re.match('(V|N)', tag)) and (not re.match('(NNP)', tag)):
                            #if re.match('(V|N)', tag):
                            #if re.match('V', tag):

                                word = lemmatizer.lemmatize(word)
                                t = t + 1
                                fout.write(word + ' ')
                    fout.write('\n')
                    if t > 0:
                        j = j + 1
            else:
                break
        fin.close()
        fout.close()