def generate_from_trigrams(lm, start_words, n_words):
"""
backoff model
start_words: list of two strings.
n_words: integer >= 0, number of words to generate, not including start_words
lm: lowercase_tokens must be nonempty
"""
# Create probability maps
trigram_counter = Counter(ngrams(lm.lowercase_tokens, 3))
trigram_prob = trigram_prob_map(trigram_counter)
bigram_cfd = nltk.ConditionalFreqDist(ngrams(lm.lowercase_tokens, 2))
bigram_prob = bigram_prob_map(bigram_cfd)
unigram_counter = Counter(lm.lowercase_tokens)
unigram_prob = unigram_prob_map(unigram_counter)
# Build sentence
w1, w2 = start_words[0], start_words[1]
words = [w1, w2]
for i in range(n_words):
# Use trigram
if (w1, w2) in trigram_prob:
prob_map = trigram_prob[(w1, w2)]
next_words = prob_map.keys()
next_word = choice(next_words, p=[prob_map[w] for w in next_words])
# Use bigram
elif w2 in bigram_prob:
prob_map = bigram_prob[w2]
next_words = prob_map.keys()
next_word = choice(next_words, p=[prob_map[w] for w in next_words])
# Use unigram
else:
prob_map = unigram_prob
next_words = prob_map.keys()
next_word = choice(next_words, p=[prob_map[w] for w in next_words])
# Update words
w1 = w2
w2 = next_word
words.append(w2)
sentence = ' '.join(words)
return sentence
language_model.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录