def most_distinctive_terms(self, fieldname, number=5, prefix=''):
"""Returns the top 'number' terms with the highest `tf*idf` scores as
a list of (score, text) tuples.
"""
N = float(self.doc_count())
gen = ((terminfo.weight() * log(N / terminfo.doc_frequency()), text)
for text, terminfo in self.iter_prefix(fieldname, prefix))
return nlargest(number, gen)
评论列表
文章目录