FengChao.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:hzlgithub 作者: hzlRises 项目源码 文件源码
def getKeyword(i):#??json
    try:
        time.sleep(1)           
        headers = [ 
        'Host:fengchao.baidu.com',
        'User-Agent: %s' %getUA(),
        'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        'Accept-Encoding: gzip, deflate',
        'Referer: http://fengchao.baidu.com/nirvana/main.html?userid=8048066',
        'Connection: keep-alive',
        'COOKIE:%s' %COOKIE,
        ]
        post = urllib.urlencode({
            'params': '{"entry":"kr_station","query":"%s","querytype":1,"pageNo":1,"pageSize":300}' % keyword_list[i],
            'path': 'jupiter/GET/kr/word',
            'token': TOKEN,
            'userid': USERID,
        })
        url = 'http://fengchao.baidu.com/nirvana/request.ajax?path=jupiter/GET/kr/word'
        c = pycurl.Curl()
#       c.setopt(pycurl.PROXY, getRandomAlbIp())                            
        c.setopt(pycurl.URL, url)                   
        c.setopt(pycurl.FOLLOWLOCATION, True)       
        c.setopt(pycurl.MAXREDIRS,5)                
        c.setopt(pycurl.CONNECTTIMEOUT, 20)         
        c.setopt(pycurl.TIMEOUT,20)                 
        c.setopt(pycurl.ENCODING, 'gzip,deflate')   
        c.fp = StringIO.StringIO()                  
        c.setopt(pycurl.HTTPHEADER,headers)         
        c.setopt(pycurl.POST, 1)                
        c.setopt(pycurl.POSTFIELDS, post)           
        c.setopt(c.WRITEFUNCTION, c.fp.write)       
        c.perform()
#       mutex.acquire()#??
        jsonData = c.fp.getvalue()              
        analyseJsonData(i,jsonData)
#       mutex.release()#??
    except Exception,e:
        print e
        pass
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号