def URL_to_HTML(URL):
try:
HTML = urllib2.urlopen(URL)
except urllib2.HTTPError as error:
print u'HTTPError: {0} ({1})'.format(URL, error.code)
except urllib2.URLError as error:
print u'URLError: {0} ({1})'.format(URL, error.reason)
except httplib.BadStatusLine as error:
print u'BadStatusLine: {}'.format(URL)
except SocketError as error:
if error.errno != errno.ECONNRESET:
raise
pass
else:
Charset = HTML.headers['content-type'][HTML.headers['content-type'].index('=') + 1:]
HTML = unicode(HTML.read(), Charset)
return HTML
Wikipedia_Crawler.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录