def triterms(words, join_string):
"""
Input: a list of words, e.g., ["I", "am", "Denny", "boy"]
Output: a list of triterm, e.g., ["I_am_Denny", "I_am_boy", "I_Denny_boy", "am_Denny_boy"]
"""
assert type(words) == list
L = len(words)
if L > 2:
lst = []
for i in xrange(L - 2):
for j in xrange(i + 1, L - 1):
for k in xrange(j + 1, L):
lst.append(join_string.join([words[i], words[j], words[k]]))
else:
# set it as biterm
lst = NgramUtil.biterms(words, join_string)
return lst
评论列表
文章目录