def down_allpic(html_file):
urls = []
id_pattern = re.compile(r' data-clipboard-text="(.*?)"', re.MULTILINE)
with open(html_file, encoding='utf-8') as f:
content = f.read()
for m in id_pattern.finditer(content):
urls.append(m.group(1))
print(len(urls))
down_dir = r'D:/download/20170304/'
for u in urls:
if (u.endswith('.zip') or u.endswith('/')):
continue
print(u)
# ?????????
r = requests.get(u, stream=True)
# ??????????????
chunk_size = 1024
with open(os.path.join(down_dir, os.path.split(u)[1]), 'wb') as fd:
for chunk in r.iter_content(chunk_size):
fd.write(chunk)
return urls
评论列表
文章目录