def build_pos_ngrams(tagged, low, high):
LOGGER.debug("Building POS ngrams from %d to %d" % (low, high))
assert low <= high
assert low > 0
pos_tokens = []
pos_words = defaultdict(list)
for word, pos in tagged:
pos_tokens.append(pos)
pos_words[pos].append(word)
grams = {}
for n in range(low, high + 1):
grams[n] = [g for g in ngrams(pos_tokens, n)]
return grams, pos_words
评论列表
文章目录