IPSpider.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:IPProxyPool 作者: jianghaibo12138 项目源码 文件源码
def run(self):
        while(True):
            logging.info("[+] Spider start runing")
            # print '[+] ', 'Spider start runing'
            spawns = []
            # ??????ip???
            # select * from ip_table
            db = DBHelper()
            ids = db.getIds()
            # self.db_ip_num.value = len(ids)
            # print selected_id
            # exit()
            logging.info("[+] db save ip: {0}".format(len(ids)))
            # print '[+] ', 'db save ip:%d' % len(ids)
            if len(ids) < config.MINNUM:
                logging.info("[+] now ip num < MINNUM start spider")
                # print '[+] ', 'now ip num < MINNUM start spider...'
                for parser in config.parser_list:
                    if ids:
                        selected_id = random.choice(ids)
                        ip = db.getIp(selected_id)
                    else:
                        ip = ''
                    spawns.append(gevent.spawn(self.spider, parser, ip))
                    if len(spawns) >= config.MAX_DOWNLOAD_CONCURRENT:
                        gevent.joinall(spawns)
                        spawns = []
                gevent.joinall(spawns)
            else:
                logging.info("[+] now ip num meet the requirement,wait check again...'")
                # print '[+] ', 'now ip num meet the requirement,wait check again...', '#'

            logging.info("[+] sleep now")
            # print "sleep now"
            time.sleep(config.CHECK_INTERVAL)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号