crawler.py 文件源码-python代码片段

crawler.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, target, dynamic=0):
        self.target = target
        self.dynamic = dynamic
        self.url_set = []  # ??????
        self.urls = []  # ?????????
        self.sitemap = []
        self.q = queue.Queue(0)
        self.url_rule = []
        self.thread_num = 4
        self.header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0'}

        self.chrome_options = webdriver.ChromeOptions()
        self.chrome_options.add_argument("--headless")
        self.chrome_options.add_argument("--disable-gpu")
        self.chrome_options.add_argument("--window-size=1920x1080")
        self.chrome_options.add_argument("--disable-xss-auditor")
        # ????
        chrome_prefs = {}
        chrome_prefs["profile.default_content_settings"] = {"images": 2}
        self.chrome_options.experimental_options["prefs"] = chrome_prefs