def loadDataFromCutFile(self,totalnum):
doc = []
cut = Cut()
for i in range(1,totalnum):
line = cut.getRow(i,Global.cutnews_dir,Global.filesize)
if not line:
break
data = json.loads(line)
keyword = analyse.extract_tags(data['content'],topK=20)
seg = " ".join(keyword)
print seg
doc.append(seg)
return doc
#calculate tf-idf
评论列表
文章目录