def __init__(self, domains, urls, *args, **kwargs):
"""Constructor for FeedSpider.
Parameters
----------
domains : list
A list of domains for the site.
urls : list
A list of feed URLs of the site.
provider : string
The provider of RSS feed.
url_regex : string
URL pattern regular expression.
If you use this spider to store item into database, additional
keywords are required:
platform_id : int
The id of a platform instance.
session : object
An instance of SQLAlchemy session.
Other keywords are used to specify how to parse the XML, see
http://doc.scrapy.org/en/latest/topics/spiders.html#scrapy.spiders\
.XMLFeedSpider.
"""
self.platform_id = kwargs.pop('platform_id', None)
self.session = kwargs.pop('session', None)
self.url_regex = kwargs.pop('url_regex', None)
self.provider = kwargs.pop('provider', 'self')
self.iterator = kwargs.pop('iterator', 'iternodes')
self.itertag = kwargs.pop('iterator', 'item')
self.allowed_domains = domains
self.start_urls = urls
super(FeedSpider, self).__init__(*args, **kwargs)
评论列表
文章目录