dialogue.py 文件源码-python代码片段

dialogue.py 文件源码

python

阅读 44 收藏 0 点赞 0 评论 0

项目：PyTrafficCar 作者: liyuming1978 项目源码文件源码

def load_data():
    global N, words, labels

    posts = corpus.xml_posts()[:10000]
    freqs = [ FreqDist(post.text) for post in posts ] 
    words = list(set(word 
                    for dist in freqs 
                    for word in dist.keys()
                    if word not in ENGLISH_STOP_WORDS and
                    word not in punctuation))

    labels = list(set([ post.get('class') for post in posts ]))

    data = []
    N = len(words)
    for post, dist in zip(posts, freqs):
        V = Vol(1, 1, N, 0.0)
        for i, word in enumerate(words):
            V.w[i] = dist.freq(word)
        data.append((V, labels.index(post.get('class'))))

    return data