def trigrams(words, join_string, skip=0):
"""
Input: a list of words, e.g., ["I", "am", "Denny"]
Output: a list of trigram, e.g., ["I_am_Denny"]
"""
assert type(words) == list
L = len(words)
if L > 2:
lst = []
for i in range(L - 2):
for k1 in range(1, skip + 2):
for k2 in range(1, skip + 2):
if i + k1 < L and i + k1 + k2 < L:
lst.append(join_string.join([words[i], words[i + k1], words[i + k1 + k2]]))
else:
# set it as bigram
lst = NgramUtil.bigrams(words, join_string, skip)
return lst
评论列表
文章目录