def sample_handling(sample, lexicon, classification):
featureset = []
with io.open(sample, 'r', encoding='utf-8') as f:
contents = f.readlines()
for l in contents[:hm_lines]:
current_words = word_tokenize(l.lower())
current_words = [lemmatizer.lemmatize(i) for i in current_words]
features = np.zeros(len(lexicon))
for word in current_words:
if word.lower() in lexicon:
index_value = lexicon.index(word.lower())
features[index_value] += 1
features = list(features)
featureset.append([features, classification])
return featureset
create_sentiment_featuresets.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录