def crawl_detail(self): #????????????
for ipo in self.ipo_list:
attr_report(446076)
url = self.detail_url % ipo["code"]
req = urllib.request.Request(
url=url,
)
req.add_header('User-agent', user_agent)
retry_cnt = 0
while retry_cnt < 3:
try:
try:
resp = urllib.request.urlopen(req, timeout=3)
except urllib.HTTPError as e:
logging.warning("server process request error: err_code=%s", e.code)
return -5, None
except urllib.URLError as e:
logging.warning("reach server error: reason=%s", e.reason)
return -10, None
except Exception as e:
logging.warning("other exception: msg=%s", e.message)
return -100, None
html_text = resp.read().decode("gbk")
resp.close()
if self._parse_detail(html_text, ipo):
break
else:
retry_cnt += 1
except Exception as e:
retry_cnt += 1
logging.warning('cn craw {0} detail ex:{1}, {2}'.format(ipo["code"], e, traceback.format_exc()))
if retry_cnt >= 3:
attr_report(441846)
logging.info("cn craw {0} detail fail".format(ipo["code"]))
else:
attr_report(441845)
评论列表
文章目录