def get_twitter_profile(username, twitter_id):
url = "https://twitter.com/" + str(twitter_id) + "?lang=en"
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
req = urllib2.Request(url)
req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/50.0.2661.102 Safari/537.36')
response = opener.open(req, timeout=10)
data = response.read()
profile_data = re.findall('class="json-data" value="(.*?)">', data)
profile = json.loads(profile_data[0].replace('"', '"'))
profile.pop("promptbirdData", None)
profile.pop("wtfOptions", None)
profile.pop("typeaheadData", None)
profile.pop("dm", None)
profile.pop("initialState", None)
profile.pop("activeHashflags", None)
profile.pop("keyboardShortcuts", None)
profile.pop("deciders", None)
out = codecs.open("./Twitter/%s_t.json" % username, 'w', 'utf-8')
out.write(json.dumps(profile, indent=4))
out.close()
medium_crawler.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录