def statistics_by_aspect():
filename = "aspects_train.csv"
words_dist = nltk.ConditionalFreqDist()
sample_sizes = nltk.FreqDist()
samples_stream = get_samples_stream(filename)
for aspect,words in samples_stream:
sample_sizes[aspect] += 1
for word in words:
words_dist[aspect][word] += 1
for category,dist in words_dist.iteritems():
print "\n------- Category: {}".format(category)
print dist.most_common(20)
total_samples = sample_sizes.N()
print "\ntotally {} samples".format(total_samples)
for aspect, count in sample_sizes.iteritems():
print "aspect[{}] has {} samples, {:.2f}%".format(aspect,count, count*100.0/total_samples)
评论列表
文章目录