def tag_contexts(doc_id):
global tags
if not tags :
tags = nltk.data.load("help/tagsets/upenn_tagset.pickle")
words = defaultdict(Counter)
count = Counter()
for context in get_contexts(doc_id) :
for word, tag in nltk.pos_tag(tokenize(context)) :
words[tag].update([word])
count.update([tag])
tag_common_words = {tag : ' '.join(zip(*tag_words.most_common(10))[0]) for tag, tag_words in words.items() }
for tag, freq in count.most_common(15) :
print "%4d\t%45s\t%s" % (freq, tags[tag][0], tag_common_words[tag])
评论列表
文章目录