def spider_closed(self, spider):
self.exporter.finish_exporting()
file_json_lines = self.files.pop(spider)
file_json_lines.close()
python类spider_closed()的实例源码
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) # ????????????
return pipeline
def spider_closed(self, spider, reason):
"""
????????????
:param spider:
:param reason: finished/cancelled/shutdown
:return:
"""
print time.strftime("%Y-%m-%d %H:%M:%S"), 'StatsPipeline Signals: spider_closed'
print spider.crawler.stats.get_stats()
print spider.crawler.stats.get_value('downloader/request_count', 0) # ????
print spider.crawler.stats.get_value('downloader/response_count', 0) # ????
print spider.crawler.stats.get_value('response_received_count', 0) # ??????
print spider.crawler.stats.get_value('item_dropped_count', 0) # ??????
print spider.crawler.stats.get_value('item_scraped_count', 0) # ??????
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.engine_started, signals.engine_started) # ????
crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped) # ????
crawler.signals.connect(pipeline.item_scraped, signals.item_scraped) # ??????????
crawler.signals.connect(pipeline.item_dropped, signals.item_dropped) # ??????????
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) # ????????????
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) # ????????????
crawler.signals.connect(pipeline.spider_idle, signals.spider_idle) # ????
crawler.signals.connect(pipeline.spider_error, signals.spider_error) # ????
crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled) # ??????
crawler.signals.connect(pipeline.request_dropped, signals.request_dropped) # ??????
crawler.signals.connect(pipeline.response_received, signals.response_received) # ????
crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded) # ????
return pipeline
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_csv = self.files.pop(spider)
file_csv.close()
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_xml = self.files.pop(spider)
file_xml.close()
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def __init__(self, *args, **kwargs):
super(FullDomainSpider, self).__init__(*args, **kwargs)
self.allowed_domains = kwargs.get('allowed_domains').split(',')
self.org = kwargs.get('org')
self.start_urls = kwargs.get('start_urls').split(',')
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_closed(self, spider):
try:
self.conn.close()
except:
los.msg("Could not close database connection", level=log.ERROR)
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_closed(self, spider):
self.exporters[spider].finish_exporting()
file = self.files.pop(spider)
file.close()
def __exit__(self, exc_type, exc_val, exc_tb):
responses = self.crawler.signals.send_catch_log(signal=signals.spider_closed,
spider=self.spider, reason=None)
for _, failure in responses:
if failure:
failure.raiseException()
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.count += 1
if self.count > 1000:
self.count = 0
self.file_count += 1
self.spider_closed()
self.spider_opened()
self.exporter.export_item(item)
return item
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline