def extract_dictionary_feature(file_name, col_tag=0, col_content=1):
# ????
adv = codecs.open('./data/vocabulary/adv.txt', 'rb', encoding='utf-8').read().split('\n')
inverse = codecs.open('./data/vocabulary/inverse.txt', 'rb', encoding='utf-8').read().split('\n')
negdict = codecs.open('./data/vocabulary/negdict.txt', 'rb', encoding='utf-8').read().split('\n')
posdict = codecs.open('./data/vocabulary/posdict.txt', 'rb', encoding='utf-8').read().split('\n')
contents = pd.read_excel(file_name, header=None)
print 'cut words...'
cw = lambda x: [pair for pair in psg.lcut(x) if pair.word not in stopwords]
contents['pairs'] = contents[col_content].apply(cw)
matrix = reviews2matrix(list(contents['pairs']), posdict, negdict, inverse, adv)
x = matrix2vec(matrix)
y = list(contents[col_tag])
return x, y
extract_feature.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录