medium_crawler.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码
def get_followers(user_id):
    url = 'https://medium.com/_/api/users/' + user_id + '/followers'
    cj = cookielib.MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    req = urllib2.Request(url)
    req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
                    Chrome/50.0.2661.102 Safari/537.36')
    response = opener.open(req, timeout=10)
    data = response.read()
    followers = re.findall('"username":"(.*?)","createdAt"', data)
    followers_set = set(followers)
    to = re.findall('"to":"(.*?)"}}},"v"', data)
    while to:
        url = 'https://medium.com/_/api/users/' + user_id + '/followers?to=' + to[0]
        cj = cookielib.MozillaCookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
        req = urllib2.Request(url)
        req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
                        Chrome/50.0.2661.102 Safari/537.36')
        response = opener.open(req, timeout=10)
        data = response.read()
        followers = re.findall('"username":"(.*?)","createdAt"', data)
        followers_set.update(followers)
        to = re.findall('"to":"(.*?)"}}},"v"', data)
    return list(followers_set)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号