def text_clean(filename):
'''
Input: File path of script.
Output: List of all words in script lowercased, lemmatized, without punctuation.
'''
wnl = WordNetLemmatizer()
word_list = [word.decode("utf8", errors='ignore') for line in open(filename, 'r') for word in line.split()]
lemma_list = [wnl.lemmatize(word.lower()) for word in word_list]
return lemma_list
评论列表
文章目录