def getAllCritics(movieList):
reviews = np.array(map(lambda x: x["critics"], movieList))
reviews = np.concatenate(reviews)
tokenizeReview = []
for review in reviews:
s = review['review']
s = RegexpTokenizer(r'\w+').tokenize(s.lower())
s = map(lambda x: PorterStemmer().stem(x), s)
s = filter(lambda x: x not in stopwords.words('english'), s)
tokenizeReview.append((s, 'pos' if review["tomatometer"] == "fresh" else 'neg'))
return tokenizeReview
评论列表
文章目录