python类get_project_settings()的实例源码

mysql.py 文件源码 项目:scrappy 作者: DormyMo 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        self.settings = project.get_project_settings()  # get settings
        self.MYSQL_HOST = self.settings.get('MYSQL_HOST')
        self.MYSQL_PORT = self.settings.getint('MYSQL_PORT')
        self.MYSQL_USER = self.settings.get('MYSQL_USER')
        self.MYSQL_PASSWD = self.settings.get('MYSQL_PASSWD')
        self.MYSQL_DB = self.settings.get('MYSQL_DB')
        self._conn()
mongo.py 文件源码 项目:scrappy 作者: DormyMo 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, collection_name):
        self.settings = project.get_project_settings()  # get settings
        self.MONGO_URL = self.settings.get("MONGO_URL","localhost")
        self.client = MongoClient(
            host=self.mongo_url, tz_aware=True)
        self.db = self.client['crawl_db']
        self.posts = self.db[collection_name]
lagouSpider.py 文件源码 项目:lagouwang 作者: whaike 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self):
        print 'preparing ------------------'
        self.start_page = 1
        self.modelUrl = 'https://www.lagou.com/jobs/'
        self.DBK = get_project_settings().getdict('DBK') #??settings??DBK??
        hp.NEWHTTPS() #????IP?
        self.oldPages = self.getOldpages() #?????????
pipelines.py 文件源码 项目:lagouwang 作者: whaike 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self):
        self.DBK = get_project_settings().getdict('DBK')
cli.py 文件源码 项目:feeds 作者: nblock 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def list():
    """List all available spiders."""
    settings = get_project_settings()
    settings['LOG_ENABLED'] = False
    process = CrawlerProcess(settings)
    for s in sorted(process.spider_loader.list()):
        print(s)
crawlall.py 文件源码 项目:ProxyPool 作者: Time1ess 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def run(self, args, opts):
        conn = redis.Redis(decode_responses=True)
        runner = CrawlerRunner(get_project_settings())
        try:
            rules = Rule.loads()
            if not rules:
                raise ValueError
        except ValueError:
            print('Error in loading Redis rules, fallback to CSV rules')
            rules = Rule.loads('csv')
        for rule in rules:
            rule.save()
            if rule.name in self.excludes:
                continue
            if conn.hget('Rule:' + rule.name, 'status') == 'started':
                d = runner.crawl(ProxySpider, rule)
                # Set status to stopped if crawler finished
                d.addBoth(lambda _: conn.hset(
                    'Rule:' + rule.name, 'status', 'finished'))
        rule_maintainer = RuleMaintainer(conn, runner)
        proxy_maintainer = ProxyMaintainer(conn)
        schedule_maintainer = ScheduleMaintainer(conn)
        lc = task.LoopingCall(rule_maintainer)
        lc.start(1)
        lc = task.LoopingCall(proxy_maintainer)
        lc.start(0.5)
        lc = task.LoopingCall(schedule_maintainer)
        lc.start(10)
        reactor.run()
dbhelper.py 文件源码 项目:dianping 作者: bsns 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self):
        self.settings=get_project_settings() #??settings??????????

        self.host=self.settings['MYSQL_HOST']
        self.port=self.settings['MYSQL_PORT']
        self.user=self.settings['MYSQL_USER']
        self.passwd=self.settings['MYSQL_PASSWD']
        self.db=self.settings['MYSQL_DBNAME']
run.py 文件源码 项目:decoration-design-crawler 作者: imflyn 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self):
        self.is_running = False
        dispatcher.connect(self.pause_crawler, signals.engine_stopped)
        self.setting = get_project_settings()
        self.process = None
run.py 文件源码 项目:decoration-design-crawler 作者: imflyn 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self):
        self.is_running = False
        dispatcher.connect(self.pause_crawler, signals.engine_stopped)
        self.setting = get_project_settings()
        self.process = None
ScrapeReviewCounts.py 文件源码 项目:Get-Positive 作者: M-shin 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def getReviewCount(url):
  # Get the number of reviews
  process = CrawlerProcess(get_project_settings())
  process.crawl(review_count_spider, start_url=url)
  process.start()
coindesk.py 文件源码 项目:finTech 作者: keepCodingDream 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __new__(cls, *args, **kwargs):
        start_list = ['http://www.coindesk.com/category/news/']
        i = 2
        deeps = get_project_settings()['SPIDER_DEEP']
        while i < deeps:
            start_list.append('http://www.coindesk.com/category/news/page/' + bytes(i) + "/")
            i += 1
        CoinDesk.start_urls = start_list
        print CoinDesk.start_urls
        return super(CoinDesk, cls).__new__(cls, *args, **kwargs)
dbhelper.py 文件源码 项目:finTech 作者: keepCodingDream 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self):
        self.settings=get_project_settings()

        self.host=self.settings['MYSQL_HOST']
        self.port=self.settings['MYSQL_PORT']
        self.user=self.settings['MYSQL_USER']
        self.passwd=self.settings['MYSQL_PASSWD']
        self.db=self.settings['MYSQL_DBNAME']
proxy.py 文件源码 项目:PythonScrapyBasicSetup 作者: matejbasic 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def import_settings(self):
        settings = get_project_settings()
        self.password = settings['AUTH_PASSWORD']
        self.http_proxy = settings['HTTP_PROXY']
        self.control_port = settings['CONTROL_PORT']
        self.max_req_per_ip = settings['MAX_REQ_PER_IP']
class_pipeline_test.py 文件源码 项目:dancedeets-monorepo 作者: mikelambert 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def runTest(self):
        settings = get_project_settings()
        settings.set('SPIDER_MODULES', ['classes.spiders'])
        try:
            sys.path.append(scrapy_path)
            runner = CrawlerRunner(settings)
            spiders = runner.spider_loader.list()
            self.assertEqual(set(class_pipeline.get_spiders()), set(spiders))
        except:
            pass
ignition.py 文件源码 项目:tobber 作者: fchamicapereira 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def __init__(self):

        # getting the settings of the project (settings.py)
        self.settings = get_project_settings()

        # processing input arguments
        self.process_args()

        # meeting the arguments with the settings
        self.change_settings()

        # open mongo here just to check if mongod service is running
        # if it isn't, might as well not start crawling
        if self.args.file == None:
            self.open_mongo()
            self.dump_collection()

        # running the spiders
        self.run_crawler()

        if self.args.file:
            self.sort_file()

        else:

            if self.args.server == False:

                # working with the mongo db
                self.sort()

            # close mongo
            self.close_mongo()
aiqiyi_spider.py 文件源码 项目:video_url_crawler_demo 作者: czs0x55aa 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self):
        scrapy.spiders.Spider.__init__(self)

        self.global_settings = get_project_settings()
        if self.global_settings['PLATFORM'] in ['win', 'mac']:
            self.driver = webdriver.PhantomJS(executable_path= self.global_settings['PHANTOMJS_PATH'])
        elif self.global_settings['PLATFORM'] in ['linux']:
            self.driver = webdriver.PhantomJS()
        self.driver.set_page_load_timeout(30)
        self.driver.implicitly_wait(10)

        self.type_id_list = self.global_settings['CRAWLER']['type_id_list']
        self.re_type_id = re.compile(self.global_settings['CRAWLER']['re_type_id'])
        self.url_template = self.global_settings['CRAWLER']['url_template']
asset.py 文件源码 项目:py-investment 作者: kprestel 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _run_spiders(ticker_list, start_date, end_date):
        configure_logging()
        runner = CrawlerRunner(settings=get_project_settings())

        spider_dict = {
            'symbols': ticker_list,
            'start_date': start_date,
            'end_date': end_date
        }
        runner.crawl(EdgarSpider, **spider_dict)
        d = runner.join()
        d.addBoth(lambda _: reactor.stop())
        reactor.run()
crawler.py 文件源码 项目:Pysearch2.0 作者: Pysearch 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def crawl(url):
    """Initialize crawling sequence."""
    settings = get_project_settings()
    settings.url = url
    settings["CLOSESPIDER_PAGECOUNT"] = CRAWL_COUNT
    settings["DEPTH_LEVEL"] = DEPTH_LEVEL
    process = CrawlerProcess(settings)

    class ThisSpider(CrawlingSpider):
        """Create a spider to crawl with."""

        start_urls = [url]
    process.crawl(ThisSpider)
    process.start()
harvester.py 文件源码 项目:Pysearch2.0 作者: Pysearch 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def harvest(url):
    """Initialize harvest sequence."""
    settings = get_project_settings()
    settings.url = url
    process = CrawlerProcess(settings)
    process.crawl(HarvestSpider, url=url)
    process.start()
sogou_weixin.py 文件源码 项目:sogou_weixin 作者: xiaodaguan 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):

        settings = get_project_settings()

        self.create_display()

        self.load_proxy_list()

        self.get_item_seen(settings)


问题


面经


文章

微信
公众号

扫码关注公众号