def post_desc_counter():
""" ??????
"""
# import thulac
post = open(os.path.join("data", "post_require.txt"),
"r", encoding="utf-8").read()
# ?? thulac ??
# thu = thulac.thulac(seg_only=True)
# thu.cut(post, text=True)
# ?? jieba ??
file_path = os.path.join("data", "user_dict.txt")
jieba.load_userdict(file_path)
seg_list = jieba.cut(post, cut_all=False)
counter = dict()
for seg in seg_list:
counter[seg] = counter.get(seg, 1) + 1
counter_sort = sorted(
counter.items(), key=lambda value: value[1], reverse=True)
pprint(counter_sort)
with open(os.path.join("data", "post_pre_desc_counter.csv"),
"w+", encoding="utf-8") as f:
f_csv = csv.writer(f)
f_csv.writerows(counter_sort)
评论列表
文章目录