def tokenize(sentences: List[str]) -> Tuple[List[int], List[List[str]]]:
tokenizer = Tokenizer()
lengths = []
texts = []
for s in sentences:
result = tokenizer.tokenize(s)
surfaces = [t.surface for t in result]
lengths.append(len(surfaces))
text = ' '.join(surfaces)
texts.append(text)
return lengths, texts
评论列表
文章目录