def extract(filename,key_part=['# ??']): # ??????????? ???????
# print filename
lines=get_text(filename)
words=[]
texts=''
for key in key_part:
index=lines.index(key)
# print index
words+=lines[index+1].decode('utf-8').split() # ??????
# print cchardet.detect(lines[index+1])
texts+=lines[index+1].decode('utf-8')+' '
words_dict={}
for w in words:
words_dict[w]=words_dict.get(w,0)+1 # dict
return Document(words_dict,filename,texts)
# return words
kmeans_cluster.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录