def from_words(cls, words, window_size=2):
"""Construct a BigramCollocationFinder for all bigrams in the given
sequence. When window_size > 2, count non-contiguous bigrams, in the
style of Church and Hanks's (1990) association ratio.
"""
wfd = FreqDist()
bfd = FreqDist()
if window_size < 2:
raise ValueError("Specify window_size at least 2")
for window in ngrams(words, window_size, pad_right=True):
w1 = window[0]
if w1 is None:
continue
wfd[w1] += 1
for w2 in window[1:]:
if w2 is not None:
bfd[(w1, w2)] += 1
return cls(wfd, bfd, window_size=window_size)
评论列表
文章目录