def parse_node(self, response, node):
il = FeedEntryItemLoader(response=response,
base_url='http://{}'.format(self.name),
dayfirst=True)
il.add_value('updated', node.xpath('//pubDate/text()').extract_first())
il.add_value('author_name',
html.unescape(node.xpath('//dc:creator/text()').
extract_first()))
categories = node.xpath('//category/text()').extract()
for category in categories:
il.add_value('category', html.unescape(category))
title = node.xpath('(//title)[2]/text()').extract()
if not title and categories:
# Fallback to the first category if no title is provided
# (e.g. comic).
title = categories[0]
il.add_value('title', html.unescape(title))
link = node.xpath('(//link)[2]/text()').extract_first()
il.add_value('link', link)
return scrapy.Request(link, self._parse_article, meta={'il': il})
评论列表
文章目录