def parse(self, response):
sel = Selector(response)
#items = []
#????url???
item = CSDNBlogItem()
article_url = str(response.url)
article_name = sel.xpath('//div[@id="article_details"]/div/h1/span/a/text()').extract()
item['article_name'] = [n.encode('utf-8') for n in article_name]
item['article_url'] = article_url.encode('utf-8')
yield item
#????????url
urls = sel.xpath('//li[@class="next_article"]/a/@href').extract()
for url in urls:
print url
url = "http://blog.csdn.net" + url
print url
yield Request(url, callback=self.parse)
评论列表
文章目录