def parse_search(self, response):
"""
@summary: ?????????????request???????
@param response:start_requests()?????????????
"""
# ???????????????????????"antispider"??
# ????"antispider"???????????????????????????
if "antispider" in response.url:
spider_logger.error("Closing spider for verification code received in %s ! Spider will restart automatically after 12 hours!" % response.url)
time.sleep(43200) # ??????????????
raise CloseSpider('antispider')
# ext????????????????json????url???????????????
ext = response.xpath(
'//div[@class="wx-rb bg-blue wx-rb_v1 _item"][1]/@href').extract() # ?????????????????????????????ext??
if not ext:
spider_logger.error("Faild searching {0} !".format(response.meta['query']))
return
# ???????json???url?????????10?????????????1?(page=1????)?url
json_url = "".join(ext).replace('/gzh?','http://weixin.sogou.com/gzhjs?')+'&cb=sogou.weixin_gzhcb&page=1&gzhArtKeyWord='
cookies = response.meta['cookies']
yield Request(json_url, callback= self.parse_index, cookies=cookies, meta ={'cookies':cookies})
评论列表
文章目录