def jieba_cut():
#??pos_all_dict??
fp_pos = open("hownet/pos_all_dict.txt", "r") # ?????????
fp_pos_cut = codecs.open('hownet/pos_all_cut.txt', "w+", encoding='UTF-8') # ????????????
contents = fp_pos.readlines()
for content in contents:
word = content.decode("utf-8") # ??
word_tag = pseg.cut(word)
str_tag = ""
for tag in word_tag:
str_tag += str(tag.word) + '/' + str(tag.flag)
p = re.compile(r'/x(.*)')
str_tag = p.sub(r'\1', str_tag) # ??????
fp_pos_cut.write(str_tag)
fp_pos.close()
fp_pos_cut.close()
#??pos_all_dict??
fp_neg = open("hownet/neg_all_dict.txt", "r") # ?????????
fp_neg_cut = codecs.open('hownet/neg_all_cut.txt', "w+", encoding='UTF-8') # ????????????
contents = fp_neg.readlines()
for content in contents:
word = content.decode("utf-8") # ??
word_tag = pseg.cut(word)
str_tag = ""
for tag in word_tag:
str_tag += str(tag.word) + '/' + str(tag.flag)
p = re.compile(r'/x(.*)')
str_tag = p.sub(r'\1', str_tag) # ??????
fp_neg_cut.write(str_tag)
fp_neg.close()
fp_neg_cut.close()
# ????????????
评论列表
文章目录