def print(self, f, format='counts'):
for key, counts in self.count_dict.items():
if (self.source_tf[key] >= self.source_tf_filter) and \
(self.source_df[key] / float(self.count_docs) <= self.source_df_filter):
candidates = [(v, c) for v, c in counts.items() if not self._filtered_trans(v)]
candidates = sorted(candidates, key=itemgetter(1), reverse=True)
elif len(self.source_tf) == 0:
# no tf/df counts - dictionary read from file
candidates = sorted(counts.items(), key=itemgetter(1), reverse=True)
else:
continue
if self.top_n:
candidates = candidates[:self.top_n]
if candidates:
if format == 'counts':
f.write(u'%s\t%s\n' % (key, ' '.join([self._format(v, c) for v, c in candidates])))
elif format == 'solr':
f.write(u'%s => %s\n' % (key, candidates[0][0]))
评论列表
文章目录