mtaobao.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:taobaoscrapy 作者: hunterhug 项目源码 文件源码
def getHtml(url, daili='', postdata={}):
    """
    ???????cookie
    ?????????????POST???

    """
    # COOKIE??????
    filename = 'cookie.txt'

    # ????MozillaCookieJar??????????
    cj = http.cookiejar.MozillaCookieJar(filename)
    # cj =http.cookiejar.LWPCookieJar(filename)

    # ??????cookie?????
    # ignore_discard??????cookies???????????
    # ignore_expires??????????? cookies????????????
    # ??????????COOKIE
    if os.path.exists(filename):
        cj.load(filename, ignore_discard=True, ignore_expires=True)
    # ????COOKIE
    if os.path.exists('../subcookie.txt'):
        cookie = open('../subcookie.txt', 'r').read()
    else:
        cookie = 'ddd'
    # ????COOKIE????????
    proxy_support = urllib.request.ProxyHandler({'http': 'http://' + daili})
    # ??????
    if daili:
        print('??:' + daili + '??')
        opener = urllib.request.build_opener(proxy_support, urllib.request.HTTPCookieProcessor(cj),
                                             urllib.request.HTTPHandler)
    else:
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

    # ???????
    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'),
                         ('Referer',
                          'http://s.m.taobao.com'),
                         ('Host', 'h5.m.taobao.com'),
                         ('Cookie', cookie)]

    # ????
    urllib.request.install_opener(opener)
    # ?????POST
    if postdata:
        # ??URL??
        postdata = urllib.parse.urlencode(postdata)

        # ????
        html_bytes = urllib.request.urlopen(url, postdata.encode()).read()
    else:
        html_bytes = urllib.request.urlopen(url).read()

    # ??COOKIE????
    cj.save(ignore_discard=True, ignore_expires=True)
    return html_bytes


# ?????????? (Windows)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号