def wiki_bio_download (list_file_name, out_dir):
for name in codecs.open(list_file_name, 'r', "utf-8"):
name=name.rstrip('\n')
if not os.path.exists(out_dir):
os.makedirs(out_dir)
if not os.path.exists(out_dir+os.sep+name+".html"):
url="https://en.wikipedia.org/wiki/"+name
sys.stdout.write(" Downloading "+name.encode('utf8')+"\n")
url = urllib2.quote(url.encode('utf8'), ':/')
response = urllib2.urlopen(url)
html = response.read()
html = html.decode('utf8')
outname = out_dir+os.sep+name+".html"
html_out = codecs.open(outname, 'w', "utf-8")
html_out.write(html)
sys.stdout.write("All files downloaded\n")
评论列表
文章目录