proxy_scrapper.py 文件源码-python代码片段

proxy_scrapper.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

def download_webpage(target_url, proxy=None, timeout=5):
    s = requests.Session()

    retries = Retry(total=3,
                    backoff_factor=0.5,
                    status_forcelist=[500, 502, 503, 504])

    s.mount('http://', HTTPAdapter(max_retries=retries))

    headers = {
        'User-Agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) ' +
                       'Gecko/20100101 Firefox/54.0'),
        'Referer': 'http://google.com'
    }

    r = s.get(target_url,
              proxies={'http': proxy, 'https': proxy},
              timeout=timeout,
              headers=headers)

    if r.status_code == 200:
        return r.content

    return None


# Sockslist.net uses javascript to obfuscate proxies port number.
# Builds a dictionary with decoded values for each variable.
# Dictionary = {'var': intValue, ...})