def read_texts(path: str, source_type: FileType) -> Iterator[str]:
"""
?????????? ???????.
:param path: ???? ? ?????/?????.
:param source_type: ??? ??????.
"""
paths = Reader.get_paths(path, source_type.value)
for filename in paths:
with open(filename, "r", encoding="utf-8") as file:
if source_type == FileType.XML:
for elem in Reader.__xml_iter(file, 'item'):
yield elem.find(".//text").text
elif source_type == FileType.JSON:
# TODO: ??????? ???????
j = json.load(file)
for item in j['items']:
yield item['text']
elif source_type == FileType.RAW:
text = file.read()
for t in text.split(RAW_SEPARATOR):
yield t
评论列表
文章目录