def get_save_wikitext(wiki_filename,text_filename):
output = open(text_filename, 'w')
wiki = corpora.WikiCorpus(wiki_filename, lemmatize=False, dictionary={})
for text in wiki.get_texts():
# text = delNOTNeedWords(text,"../../stopwords.txt")[1]
output.write(" ".join(text) + "\n")
i = i + 1
if (i % 10000 == 0):
logging.info("Saved " + str(i) + " articles")
output.close()
评论列表
文章目录