def print_corpus_stats(name, sents, docs, stats):
print name + ":"
print "\tno. of docs =", len(docs[0])
if len(sents[0]) > 0:
print "\ttopic model no. of sequences =", len(sents[0])
print "\ttopic model no. of tokens =", sum([ len(item[2])-1 for item in sents[0] ])
print "\toriginal doc mean len =", stats[3]
print "\toriginal doc max len =", stats[4]
print "\toriginal doc min len =", stats[5]
if len(sents[1]) > 0:
print "\tlanguage model no. of sequences =", len(sents[1])
print "\tlanguage model no. of tokens =", sum([ len(item[2])-1 for item in sents[1] ])
print "\toriginal sent mean len =", stats[0]
print "\toriginal sent max len =", stats[1]
print "\toriginal sent min len =", stats[2]
评论列表
文章目录