def extract(lines,filename,key_part=['# ??']): # ??????????? ???????
# print filename
words=[]
for key in key_part:
try:
# print key
# print cchardet.detect(key)
index=lines.index(key)
print index
words+=lines[index+1].decode('utf-8').split() # ??????
except Exception,e:
print e
words_dict={}
for w in words:
words_dict[w]=words_dict.get(w,0)+1 # dict
# return words
return Document(words_dict,filename)
cal_similarity2.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录