python类REFERER的实例源码-面圈网

OneFichierCom.py 文件源码项目：download-manager 作者: thispc 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def signin(self, user, password, data):
        self.req.http.c.setopt(
            pycurl.REFERER,
            "https://1fichier.com/login.pl?lg=en")

        try:
            html = self.load("https://1fichier.com/login.pl?lg=en",
                             post={'mail': user,
                                   'pass': password,
                                   'It': "on",
                                   'purge': "off",
                                   'valider': "Send"})

            if any(_x in html for _x in
                   ('>Invalid username or Password', '>Invalid email address', '>Invalid password')):
                self.fail_login()

        except BadHeader, e:
            if e.code == 403:
                self.fail_login()
            else:
                raise

weixin.py 文件源码项目：hzlgithub 作者: hzlRises 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def Curl(url,headers):
    while 1:
        try:
            c = pycurl.Curl()
            c.setopt(pycurl.REFERER, 'http://weixin.sogou.com/')
            c.setopt(pycurl.FOLLOWLOCATION, True)
            c.setopt(pycurl.MAXREDIRS,5)
            c.setopt(pycurl.CONNECTTIMEOUT, 60)
            c.setopt(pycurl.TIMEOUT,120)
            c.setopt(pycurl.ENCODING, 'gzip,deflate')
            c.fp = StringIO.StringIO()  
            c.setopt(pycurl.URL, url)
            c.setopt(pycurl.HTTPHEADER,headers)
            c.setopt(c.WRITEFUNCTION, c.fp.write)
            c.perform()
            html = c.fp.getvalue()
            if '??????' in html:
                print u'??????,??10??'
                time.sleep(600)
            else:
                return html
        except Exception, e:
            print url,'curl(url)',e
            continue 
#????????

xiala_loop.py 文件源码项目：hzlgithub 作者: hzlRises 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def curl(url, debug=False, **kwargs):
        while 1:
                try:
                        s = StringIO.StringIO()
                        c = pycurl.Curl()
                        c.setopt(pycurl.URL, url)
                        c.setopt(pycurl.REFERER, url)
                        c.setopt(pycurl.FOLLOWLOCATION, True)
                        c.setopt(pycurl.TIMEOUT, 60)
                        c.setopt(pycurl.ENCODING, 'gzip')
                        c.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
                        c.setopt(pycurl.NOSIGNAL, True)
                        c.setopt(pycurl.WRITEFUNCTION, s.write)
                        for k, v in kwargs.iteritems():
                                c.setopt(vars(pycurl)[k], v)
                        c.perform()
                        c.close()
                        return s.getvalue()
                except:
                        if debug:
                                raise
                        continue

OneFichierCom.py 文件源码项目：pyload-plugins 作者: pyload 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def signin(self, user, password, data):
        self.req.http.c.setopt(
            pycurl.REFERER,
            "https://1fichier.com/login.pl?lg=en")

        try:
            html = self.load("https://1fichier.com/login.pl?lg=en",
                             post={'mail': user,
                                   'pass': password,
                                   'It': "on",
                                   'purge': "off",
                                   'valider': "Send"})

            if any(_x in html for _x in
                   ('>Invalid username or Password', '>Invalid email address', '>Invalid password')):
                self.fail_login()

        except BadHeader, e:
            if e.code == 403:
                self.fail_login()
            else:
                raise

DubbedAnime.py 文件源码项目：AnimeWatch 作者: kanishka-linux 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def ccurl(url,value):
    hdr = "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:45.0) Gecko/20100101 Firefox/45.0"

    c = pycurl.Curl()
    if value == "no_redir":
        print("no redirect")
    else:
        c.setopt(c.FOLLOWLOCATION, True)
    c.setopt(c.USERAGENT, hdr)
    if value != "" and value != "no_redir":
        post_data = {'id': value}
        post_d = urllib.parse.urlencode(post_data)
        c.setopt(c.POSTFIELDS,post_d)
    #if rfr != "":
     # c.setopt(pycurl.REFERER, rfr)
    url = str(url)
    c.setopt(c.URL, url)

    storage = BytesIO()
    c.setopt(c.WRITEDATA, storage)
    c.perform()
    c.close()
    content = storage.getvalue()
    content = getContentUnicode(content)
    return (content)

common.py 文件源码项目：cntv-video-downloader 作者: cls1991 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def get_html(url, user_agent, refer_url):
    """
    curl html
    :param url:
    :param user_agent:
    :param refer_url:
    :return:
    """
    curl = pycurl.Curl()
    curl.setopt(pycurl.USERAGENT, user_agent)
    curl.setopt(pycurl.REFERER, refer_url)

    buffers = StringIO()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.WRITEDATA, buffers)
    curl.perform()
    body = buffers.getvalue()
    buffers.close()
    curl.close()

    return body

restClient.py 文件源码项目：recipebook 作者: dpapathanasiou 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get (url, user_agent=UA, referrer=None):
    """Make a GET request of the url using pycurl and return the data
    (which is None if unsuccessful)"""

    data = None
    databuffer = StringIO()

    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.FOLLOWLOCATION, 1)
    curl.setopt(pycurl.CONNECTTIMEOUT, 5)
    curl.setopt(pycurl.TIMEOUT, 8)
    curl.setopt(pycurl.WRITEFUNCTION, databuffer.write)
    curl.setopt(pycurl.COOKIEFILE, '')
    if user_agent:
        curl.setopt(pycurl.USERAGENT, user_agent)
    if referrer is not None:
        curl.setopt(pycurl.REFERER, referrer)
    try:
        curl.perform()
        data = databuffer.getvalue()
    except Exception:
        pass
    curl.close()

    return data

pcs.py 文件源码项目：baidu-dropbox-sniffer 作者: cls1991 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_download_link(fs_id):
    """
    ??????
    :param fs_id:
    :return:
    """
    curl = pycurl.Curl()
    curl.setopt(pycurl.USERAGENT, const.USER_AGENT)
    curl.setopt(pycurl.REFERER, const.PAN_REFER_URL)

    buffers = StringIO()
    request_dict = {
        'channel': 'chunlei',
        'timestamp': '1473685224',
        'fidlist': [fs_id],
        'type': 'dlink',
        'web': 1,
        'clienttype': 0,
        'bdstoken': 'e0e895bb3ef7b0cb70899ee66b74e809',
        'sign': decode_sign(parse_sign2('d76e889b6aafd3087ac3bd56f4d4053a', '3545d271c5d07ba27355d39da0c62a4ee06d2d25'))
    }
    target_url = const.PAN_API_URL + 'download?' + urllib.urlencode(request_dict)
    curl.setopt(pycurl.URL, target_url)
    curl.setopt(pycurl.WRITEDATA, buffers)
    curl.setopt(pycurl.COOKIEFILE, "cookie.txt")
    curl.perform()
    body = buffers.getvalue()
    buffers.close()
    curl.close()
    data = json.loads(body)
    if data['errno']:
        return None

    return data['dlink'][0]['dlink']

HTTPRequest.py 文件源码项目：download-manager 作者: thispc 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def setRequestContext(self, url, get, post, referer, cookies, multipart=False):
        """ sets everything needed for the request """

        url = myquote(url)

        if get:
            get = urlencode(get)
            url = "%s?%s" % (url, get)

        self.c.setopt(pycurl.URL, url)
        self.c.lastUrl = url

        if post:
            self.c.setopt(pycurl.POST, 1)
            if not multipart:
                if type(post) == unicode:
                    post = str(post) #unicode not allowed
                elif type(post) == str:
                    pass
                else:
                    post = myurlencode(post)

                self.c.setopt(pycurl.POSTFIELDS, post)
            else:
                post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()]
                self.c.setopt(pycurl.HTTPPOST, post)
        else:
            self.c.setopt(pycurl.POST, 0)

        if referer and self.lastURL:
            self.c.setopt(pycurl.REFERER, str(self.lastURL))

        if cookies:
            self.c.setopt(pycurl.COOKIEFILE, "")
            self.c.setopt(pycurl.COOKIEJAR, "")
            self.getCookies()

Spider.py 文件源码项目：sogaQuant 作者: idoplay 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def curl_get(self, url, refUrl=None):
        buf = cStringIO.StringIO()
        curl = pycurl.Curl()
        curl.setopt(curl.URL, url)
        curl.setopt(curl.WRITEFUNCTION, buf.write)
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        #curl.setopt(pycurl.SSL_VERIFYHOST, 0)
        #curl.setopt(pycurl.HEADERFUNCTION, self.headerCookie)
        curl.setopt(pycurl.VERBOSE, 0)
        curl.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0')
        #curl.setopt(pycurl.HTTPGET,1)
        #curl.setopt(pycurl.COOKIE, Cookie)
        #curl.setopt(pycurl.POSTFIELDS, 'j_username={ngnms_user}&j_password={ngnms_password}'.format(**self.ngnms_login))
        curl.setopt(pycurl.COOKIEJAR, '/htdocs/logs/py_cookie.txt')
        curl.setopt(pycurl.COOKIEFILE, '/htdocs/logs/py_cookie.txt')
        if refUrl:
            curl.setopt(pycurl.REFERER, refUrl)
        #curl.setopt(c.CONNECTTIMEOUT, 5)
        #curl.setopt(c.TIMEOUT, 8)
        curl.perform()
        backinfo = ''
        if curl.getinfo(pycurl.RESPONSE_CODE) == 200:
            backinfo = buf.getvalue()
        curl.close()
        return backinfo

RequestCore.py 文件源码项目：AppBackend 作者: 540871129 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def handle_request(self):
        curl_handle = pycurl.Curl()
        # set default options.
        curl_handle.setopt(pycurl.URL, self.request_url)
        curl_handle.setopt(pycurl.REFERER, self.request_url)
        curl_handle.setopt(pycurl.USERAGENT, self.useragent)
        curl_handle.setopt(pycurl.TIMEOUT, self.curlopts['TIMEOUT'])
        curl_handle.setopt(pycurl.CONNECTTIMEOUT, self.curlopts['CONNECTTIMEOUT'])
        curl_handle.setopt(pycurl.HEADER, True)
        #curl_handle.setopt(pycurl.VERBOSE, 1)
        curl_handle.setopt(pycurl.FOLLOWLOCATION, 1)
        curl_handle.setopt(pycurl.MAXREDIRS, 5)
        if(self.request_headers and len(self.request_headers) > 0):
            tmplist = list()
            for(key, value) in self.request_headers.items():
                tmplist.append(key + ':' + value)
            curl_handle.setopt(pycurl.HTTPHEADER, tmplist)
        #??????POST
        curl_handle.setopt(pycurl.HTTPPROXYTUNNEL, 1)
        curl_handle.setopt(pycurl.POSTFIELDS, self.request_body)

        response = StringIO.StringIO()
        curl_handle.setopt(pycurl.WRITEFUNCTION, response.write)

        try:
            curl_handle.perform()
        except pycurl.error as error:
            raise ChannelException(error, 5)

        self.response_code = curl_handle.getinfo(curl_handle.HTTP_CODE)
        header_size = curl_handle.getinfo(curl_handle.HEADER_SIZE)
        resp_str = response.getvalue()
        self.response_headers = resp_str[0 : header_size]
        self.response_body = resp_str[header_size : ]

        response.close()
        curl_handle.close()

SubbedAnime.py 文件源码项目：AnimeWatch 作者: kanishka-linux 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def ccurlPost(url,value):
    hdr = "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:45.0) Gecko/20100101 Firefox/45.0"

    c = pycurl.Curl()
    if value == "no_redir":
        print("no redirect")
    else:
        c.setopt(c.FOLLOWLOCATION, True)
    c.setopt(c.USERAGENT, hdr)
    if value != "" and value != "no_redir":
        post_data = {'id': value}
        post_d = urllib.parse.urlencode(post_data)
        c.setopt(c.POSTFIELDS,post_d)
    #if rfr != "":
     # c.setopt(pycurl.REFERER, rfr)
    url = str(url)
    c.setopt(c.URL, url)

    storage = BytesIO()
    c.setopt(c.WRITEDATA, storage)
    c.perform()
    c.close()
    content = storage.getvalue()
    content = getContentUnicode(content)

    return (content)

pcs.py 文件源码项目：baidu-dropbox-sniffer 作者: cls1991 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def list_dir(dir_name):
    """
    ????????
    :param dir_name: ??
    :return:
    """
    result = list()
    curl = pycurl.Curl()
    curl.setopt(pycurl.USERAGENT, const.USER_AGENT)
    curl.setopt(pycurl.REFERER, const.PAN_REFER_URL)

    buffers = StringIO()
    request_dict = {
        'channel': 'chunlei',
        'clienttype': 0,
        'showempty': 0,
        'web': 1,
        'order': 'time',
        'desc': 1,
        'page': 1,
        'num': 100,
        'dir': dir_name,
        'bdstoken': 'e0e895bb3ef7b0cb70899ee66b74e809'
    }
    target_url = const.PAN_API_URL + 'list?' + urllib.urlencode(request_dict)
    curl.setopt(pycurl.URL, target_url)
    curl.setopt(pycurl.WRITEDATA, buffers)
    curl.setopt(pycurl.COOKIEFILE, "cookie.txt")
    curl.perform()
    body = buffers.getvalue()
    print body
    buffers.close()
    curl.close()
    data = json.loads(body)
    if data['errno'] == 0:
        for a_list in data['list']:
            dlink = get_download_link(a_list['fs_id'])
            if dlink:
                dlink = dlink.replace('\\', '')
                result.append(dlink)

    return result

crawler.py 文件源码项目：baidu-pictures-downloader 作者: cls1991 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def get_dlinks(search_target, get_dlinks_only=True):
    """
    ????url???????
    :param search_target: ????
    :param get_dlinks_only: ??????
    :return ???????????
    """
    refer_url = const.REFER_URL % search_target
    curl = pycurl.Curl()
    curl.setopt(pycurl.USERAGENT, const.USER_AGENT)
    curl.setopt(pycurl.REFERER, refer_url)

    result = []
    ll = 0
    record_start_cursor = get_record_start_cursor(const.CURSOR_FILE)
    if record_start_cursor:
        ll = int(record_start_cursor)
    print('start')
    # ??????????????
    while True:
        print('crawler pictures of page %d' % (ll / 30 + 1))
        # ??str?????
        buffers = StringIO()
        target_url = const.API_URL % (search_target, search_target, ll)
        curl.setopt(pycurl.URL, target_url)
        curl.setopt(pycurl.WRITEDATA, buffers)
        curl.perform()

        body = buffers.getvalue()
        body = body.replace('null', 'None')
        data = eval(body)
        if 'data' in data:
            has_data = False
            for a_data in data['data']:
                obj_url = None
                if 'objURL' in a_data:
                    obj_url = a_data['objURL']
                if obj_url:
                    has_data = True
                    result.append(obj_url)
            if not has_data:
                print('no more pic')
                break
            ll += 30
        else:
            print('no more pic')
            break

    print('done')
    curl.close()
    # ??page_num
    if ll:
        set_record_start_cursor(str(ll), const.CURSOR_FILE)
    for index, data in enumerate(result):
        result[index] = decode_url(data)

    if not get_dlinks_only:
        save_to_file(result, search_target + '.txt', const.BASE_FOLDER)