def map_coocurence(context_size, data):
coocurrence_list = []
try:
if detect(data) == 'en':
region = nltk.word_tokenize(data)
for l_context, word, r_context in _context_windows(region, context_size, context_size):
if isWord(word):
for i, context_word in enumerate(l_context[::-1]):
if isWord(context_word):
coocurrence_list.append(((word, context_word), 1 / (i + 1)))
for i, context_word in enumerate(r_context):
if isWord(context_word):
coocurrence_list.append(((word, context_word), 1 / (i + 1)))
except LangDetectException:
return coocurrence_list
return coocurrence_list
评论列表
文章目录