def get_all_category(self, response):
self.write_file('%s/category.html' % self.log_dir, response.body)
tags = response.xpath('//table/tbody/tr/td/a/@href').extract()
for tag in tags:
res = tag.split('/')
tag = res[len(res) - 1]
utils.log('tag:%s' % tag)
url = response.urljoin(tag)
yield Request(
url = url,
headers = self.headers,
dont_filter = True,
meta = {
'tag': tag,
'download_timeout': 20,
# 'is_proxy': False,
},
callback = self.get_page_count,
errback = self.error_parse
)
评论列表
文章目录