medium_crawler.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码
def get_twitter_profile(username, twitter_id):
    url = "https://twitter.com/" + str(twitter_id) + "?lang=en"
    cj = cookielib.MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    req = urllib2.Request(url)
    req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
                    Chrome/50.0.2661.102 Safari/537.36')
    response = opener.open(req, timeout=10)
    data = response.read()
    profile_data = re.findall('class="json-data" value="(.*?)">', data)
    profile = json.loads(profile_data[0].replace('"', '"'))
    profile.pop("promptbirdData", None)
    profile.pop("wtfOptions", None)
    profile.pop("typeaheadData", None)
    profile.pop("dm", None)
    profile.pop("initialState", None)
    profile.pop("activeHashflags", None)
    profile.pop("keyboardShortcuts", None)
    profile.pop("deciders", None)
    out = codecs.open("./Twitter/%s_t.json" % username, 'w', 'utf-8')
    out.write(json.dumps(profile, indent=4))
    out.close()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号