uebermedien_de.py 文件源码-python代码片段

uebermedien_de.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

def parse_node(self, response, node):
        il = FeedEntryItemLoader(response=response,
                                 base_url='http://{}'.format(self.name),
                                 dayfirst=True)
        il.add_value('updated', node.xpath('//pubDate/text()').extract_first())
        il.add_value('author_name',
                     html.unescape(node.xpath('//dc:creator/text()').
                                   extract_first()))
        categories = node.xpath('//category/text()').extract()
        for category in categories:
            il.add_value('category', html.unescape(category))
        title = node.xpath('(//title)[2]/text()').extract()
        if not title and categories:
            # Fallback to the first category if no title is provided
            # (e.g. comic).
            title = categories[0]
        il.add_value('title', html.unescape(title))
        link = node.xpath('(//link)[2]/text()').extract_first()
        il.add_value('link', link)
        return scrapy.Request(link, self._parse_article, meta={'il': il})