python类ConditionalFreqDist()的实例源码-面圈网

explore.py 文件源码项目：OpinionMining728 作者: stasi009 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def statistics_by_aspect():
    filename = "aspects_train.csv"
    words_dist = nltk.ConditionalFreqDist()
    sample_sizes = nltk.FreqDist()

    samples_stream = get_samples_stream(filename)
    for aspect,words in samples_stream:
        sample_sizes[aspect] += 1
        for word in words:
            words_dist[aspect][word] += 1

    for category,dist in words_dist.iteritems():
        print "\n------- Category: {}".format(category)
        print dist.most_common(20)

    total_samples = sample_sizes.N()
    print "\ntotally {} samples".format(total_samples)
    for aspect, count in sample_sizes.iteritems():
        print "aspect[{}] has {} samples, {:.2f}%".format(aspect,count, count*100.0/total_samples)

algorithm.py 文件源码项目：wntf 作者: tonybaloney 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def findtags(self, tag_prefix, tagged_text):
        '''
        Find all words that match a 'tag' (word type) prefix

        :param tag_prefix: The tag prefix
        :type  tag_prefix: ``str``

        :param tagged_text: The text to search
        :type  tagged_text: ``list`` of ``dict``
        '''
        cfd = nltk.ConditionalFreqDist((tag, word) for (word, tag) in tagged_text
            if tag.startswith(tag_prefix))
        return dict((tag, cfd[tag].most_common(50)) for tag in cfd.conditions())

classifier.py 文件源码项目：Neural-Learner-for-English-Language-Test 作者: taineleau 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def ngram_baseline(text):
    ngs = ngrams(text, 2)
    cnt = 0
    """
    for t in ngs:
        print(t, )
        cnt = cnt + 1
        if (cnt > 1000):
            break
    """
    refine = []
    for (first, second) in ngs:
        if (second[1] == 1):
            #print(first[0], second[0], zip(first[0], second[0]))
            #tmp = (first[0], second[0])
            #print(tmp)
            #break
            refine.append((first[0], second[0]))
    cnt = 0
    """
    for t in refine:
        print(t)
        cnt = cnt + 1
        if (cnt > 1000):
            break
    #print(ngs)
    """
    cfdist = nltk.ConditionalFreqDist(refine)
    return cfdist

sentence_generator.py 文件源码项目：ircbot 作者: pbzweihander 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def calc_cfd(doc):
    # Calculate conditional frequency distribution of bigrams
    words = [w for w, t in Mecab().pos(doc)]
    bigrams = nltk.bigrams(words)
    return nltk.ConditionalFreqDist(bigrams)

language_model.py 文件源码项目：facebook-message-analysis 作者: szheng17 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def generate_from_trigrams(lm, start_words, n_words):
        """
        backoff model
        start_words: list of two strings.
        n_words: integer >= 0, number of words to generate, not including start_words
        lm: lowercase_tokens must be nonempty
        """
        # Create probability maps
        trigram_counter = Counter(ngrams(lm.lowercase_tokens, 3))
        trigram_prob = trigram_prob_map(trigram_counter)
        bigram_cfd = nltk.ConditionalFreqDist(ngrams(lm.lowercase_tokens, 2))
        bigram_prob = bigram_prob_map(bigram_cfd)
        unigram_counter = Counter(lm.lowercase_tokens)
        unigram_prob = unigram_prob_map(unigram_counter)

        # Build sentence
        w1, w2 = start_words[0], start_words[1]
        words = [w1, w2]
        for i in range(n_words):
            # Use trigram
            if (w1, w2) in trigram_prob:
                prob_map = trigram_prob[(w1, w2)]
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])

            # Use bigram
            elif w2 in bigram_prob:
                prob_map = bigram_prob[w2]
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])

            # Use unigram
            else:
                prob_map = unigram_prob
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])


            # Update words
            w1 = w2
            w2 = next_word
            words.append(w2)
        sentence = ' '.join(words)
        return sentence