def parse_index(self, response):
"""
@summary: ?????????????????Request??
@param response: parse_search()?????????????
@return: list????????????url???????????
"""
if "antispider" in response.url:
spider_logger.error("Closing spider for verification code received in %s ! Spider will restart automatically after 12 hours!" % response.url)
time.sleep(43200)
raise CloseSpider('antispider')
requests = []
page_list = self._get_result(response)
# ???????????????
if not page_list:
return requests
next_page = True # ????????
# ???????????????
for item in page_list:
if isinstance(item, Request): # ?????Request
requests.append(item)
next_page = False
break
if item['publish_time'] <= self.from_time: # ????????self.from_time
next_page = False
break
elif item['publish_time'] > self.end_time: # ????????self.end_time
continue
else:
req = Request(item['url'], self.parse_page)
# ???????
req.meta["item"] = item
requests.append(req)
# ?????,??????Request;???????
if next_page and self._next_result_page(response):
cookies = response.meta['cookies']
requests.append(Request(self._next_result_page(response),callback=self.parse_index,cookies=cookies, meta ={'cookies':cookies}))
return requests
评论列表
文章目录