def get_following(user_id):
url = 'https://medium.com/_/api/users/' + user_id + '/following'
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
req = urllib2.Request(url)
req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/50.0.2661.102 Safari/537.36')
response = opener.open(req, timeout=10)
data = response.read()
following = re.findall('"username":"(.*?)","createdAt"', data)
following_set = set(following)
to = re.findall('"to":"(.*?)"}}},"v"', data)
while to:
url = 'https://medium.com/_/api/users/' + user_id + '/following?to=' + to[0]
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
req = urllib2.Request(url)
req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/50.0.2661.102 Safari/537.36')
response = opener.open(req, timeout=10)
data = response.read()
following = re.findall('"username":"(.*?)","createdAt"', data)
following_set.update(following)
to = re.findall('"to":"(.*?)"}}},"v"', data)
return list(following_set)
medium_crawler.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录