def article_to_pairs(arg):
article, direction = arg
pairs = []
if 'text' not in article:
return []
sents = sent_tokenize(article['text'], language='norwegian')
translations = translate(sents, direction)
for sent, trans in zip(sents, translations):
trans_tokens = tokenize(trans)
tokens = tokenize(sent)
pairs += compare(tokens, trans_tokens)
del article
del sents
del translations
return pairs
评论列表
文章目录