def get_index(self, prefix):
"""
:param prefix: str
Prefix to S3 bucket
:return: Uncompressed warc index
:rtype: str
"""
crawl = self.select_crawl(prefix)
botokey = Key(self.bucket, crawl + 'warc.paths.gz')
return [i.strip() for i in GzipFile(fileobj=BytesIO(botokey.read()))]
评论列表
文章目录