medium_crawler.py 文件源码-python代码片段

medium_crawler.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码

def get_twitter_profile(username, twitter_id):
    url = "https://twitter.com/" + str(twitter_id) + "?lang=en"
    cj = cookielib.MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    req = urllib2.Request(url)
    req.add_header("User-agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
                    Chrome/50.0.2661.102 Safari/537.36')
    response = opener.open(req, timeout=10)
    data = response.read()
    profile_data = re.findall('class="json-data" value="(.*?)">', data)
    profile = json.loads(profile_data[0].replace('&quot;', '"'))
    profile.pop("promptbirdData", None)
    profile.pop("wtfOptions", None)
    profile.pop("typeaheadData", None)
    profile.pop("dm", None)
    profile.pop("initialState", None)
    profile.pop("activeHashflags", None)
    profile.pop("keyboardShortcuts", None)
    profile.pop("deciders", None)
    out = codecs.open("./Twitter/%s_t.json" % username, 'w', 'utf-8')
    out.write(json.dumps(profile, indent=4))
    out.close()