def wash(fileList):
# denyPos = ['CC', 'CD', 'DT', 'TO', '']
st = LancasterStemmer()
for f in tqdm(fileList):
fr = open('./washFile/' + f, 'r')
fw = open("./washFile_stem/" + f, 'w')
for line in fr.read().splitlines():
line = remove_punctuation(line).lower()
# wordpos = pos(remove_punctuation(line).lower())
# for turple in wordpos:
# if (turple[0] not in stopwords.words('english')):
# fw.write(turple[0] + ' ')
# fw.write(x + ' ' for x in line.split() if x not in stopwords.words('english'))
# stopw = stopwords.words('english')
words = [x for x in line.split()]
for x in words:
try:
fw.write(st.stem(x) + ' ')
except:
print x
fr.close()
fw.close()
评论列表
文章目录