def show(self, response):
# print(response)
hxs = HtmlXPathSelector(response)
news_list = hxs.select('//div[@id="content-list"]/div[@class="item"]')
for new in news_list:
# temp = new.xpath('div/div[@class="part2"]/@share-linkid').extract()
link_id = new.xpath('*/div[@class="part2"]/@share-linkid').extract_first()
yield Request(
url='http://dig.chouti.com/link/vote?linksId=%s' % (link_id,),
method='POST',
cookies=self.cookie_dict,
callback=self.do_favor
)
page_list = hxs.select('//div[@id="dig_lcpage"]//a[re:test(@href, "/all/hot/recent/\d+")]/@href').extract()
for page in page_list:
page_url = 'http://dig.chouti.com%s' % page
import hashlib
hash = hashlib.md5()
hash.update(bytes(page_url, encoding='utf-8'))
key = hash.hexdigest()
if key in self.has_request_set:
pass
else:
self.has_request_set[key] = page_url
yield Request(
url=page_url,
method='GET',
callback=self.show
)
评论列表
文章目录