def parse(self, response):
detail = response.xpath('//ul[@class="lby-list"]//li')
pubtime = None
for temp in detail[:20]:
item = SiteItem()
temp_pubtime = temp.xpath('span/text()').extract_first().strip()[1:11]
if temp_pubtime:
item['pubtime'] = temp.xpath('span/text()').extract_first().strip()[1:11]
pubtime = item['pubtime']
item['title'] = temp.xpath('a//text()').extract_first()
print "------------------------------{}----".format(item['title'])
if temp.xpath('a/@href').extract_first():
item['link'] = "http://www.zycg.gov.cn" + temp.xpath('a//@href').extract_first()
yield item
# ???????????????
# print ('-----------------------??-------------------------------')
# print ('-------pubtime----------------{}-------------------------------'.format(pubtime))
# print ('------date.get_curdate-----------------{}-------------------------------'.format(date.get_curdate()))
if pubtime == date.get_curdate():
# ?????
# print "-----------------??-----------------"
next_page_href = "http://www.zycg.gov.cn" + (
str(response.xpath('//a[@class="next_page"]//@href').extract_first()))
yield scrapy.FormRequest(next_page_href, callback=self.parse)
评论列表
文章目录