shakespeare.py 文件源码-python代码片段

shakespeare.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：PoemGenerator 作者: eugenet12 项目源码文件源码

def get_counts():
    global unigrams
    global bigrams
    global sentences

    for i in xrange(1, NUM_FILES+1):
        if i in SKIP:
            continue
        with open("Shakespeare_parsed/%03d" % i) as f:
            for line in f:
                tokens = get_tokens(line)
                tokens = [t.lower() for t in tokens]
                tags = nltk.pos_tag(tokens)
                if len(tokens) == 0:
                    continue
                sentences.append(tokens)
                prev_word = ""
                for token in tokens:
                    unigrams[token] += 1
                    if not prev_word == "":
                        bigrams[(prev_word,token)] += 1
                    prev_word = token

    top10_uni = unigrams.most_common()[:10]
    top10_bi = bigrams.most_common()[:10]