def CleanReVerb(self):
fin_seed = open('../file/seed_ReVerb.txt', 'r')
fout_seed = open('../file/seed_ReVerb_clean.txt', 'w+')
fin_signature = open('../file/signature_ReVerb.txt', 'r')
fout_signature = open('../file/signature_ReVerb_clean.txt', 'w+')
while True:
line = fin_seed.readline()
if line:
if '***' in line:
fout_seed.write(line)
else:
mark, line = line.split(':', 1)
line = self.CleanStopWords(line)#?????
#????
line = line.split()
word_list = []
s = nltk.stem.SnowballStemmer('english')
for w in line:
w = s.stem(w)
word_list.append(w)
if len(word_list) > 0:
line = ' '.join(word_list)
fout_seed.write(mark + ':' + line + '\n')
else:
break
while True:
line = fin_signature.readline()
if line:
if '***' in line:
fout_signature.write(line)
else:
mark, line = line.split(':', 1)
line = self.CleanStopWords(line)#?????
#????
line = line.split()
word_list = []
s = nltk.stem.SnowballStemmer('english')
for w in line:
w = s.stem(w)
word_list.append(w)
if len(word_list) > 0:
line = ' '.join(word_list)
fout_signature.write(mark + ':' + line + '\n')
else:
break
fin_signature.close()
fout_signature.close()
评论列表
文章目录