def parse_url_list(self,response):
sel = scrapy.Selector(response)
wait_text = sel.xpath("//p[@id='loading']//text()").extract()
if wait_text:
#???
meta = response.meta
meta['isscreen'] = 1
#scrapy ???URL?????????url???
yield scrapy.Request(response.url, meta=meta, callback=self.parse_validate,dont_filter=True)
else:
#????html??
url_list = sel.xpath("//h4[@class='weui_media_title']/@hrefs").extract()
for li in url_list:
href = li.strip()
url = 'http://mp.weixin.qq.com%s' % href
#print(url)
yield scrapy.Request(url, meta=self.meta, callback=self.parse_item)
评论列表
文章目录