def read_block(self, stream):
block = []
for para_str in self._para_block_reader(stream):
para = []
for sent_str in self._sent_tokenizer.tokenize(para_str):
sent = self._str2chunktree(sent_str, source_tagset=self._source_tagset,
target_tagset=self._target_tagset)
# If requested, throw away the tags.
if not self._tagged:
sent = self._untag(sent)
# If requested, throw away the chunks.
if not self._chunked:
sent = sent.leaves()
# Add the sentence to `para`.
if self._group_by_sent:
para.append(sent)
else:
para.extend(sent)
# Add the paragraph to `block`.
if self._group_by_para:
block.append(para)
else:
block.extend(para)
# Return the block
return block
chunked.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录