def hamming_distance_distribution(sentence_length, vocab_size, tau=1.0):
#based on https://gist.github.com/norouzi/8c4d244922fa052fa8ec18d8af52d366
c = numpy.zeros(sentence_length)
for edit_dist in xrange(sentence_length):
n_edits = misc.comb(sentence_length, edit_dist)
#reweight
c[edit_dist] = numpy.log(n_edits) + edit_dist * numpy.log(vocab_size)
c[edit_dist] = c[edit_dist] - edit_dist / tau - edit_dist / tau * numpy.log(vocab_size)
c = numpy.exp(c)
c /= numpy.sum(c)
return c
评论列表
文章目录