def read_lett_iter(f, decode=True):
fh = f
fh.seek(0)
if f.name.endswith('.gz'):
fh = gzip.GzipFile(fileobj=fh, mode='r')
for line in fh:
lang, mime, enc, url, html, text = line[:-1].split("\t")
html = base64.b64decode(html)
text = base64.b64decode(text)
if decode:
html = html.decode("utf-8")
text = text.decode("utf-8")
p = Page(url, html, text, mime, enc, lang)
yield p
lett.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录