belvedere_artists.py 文件源码

python
阅读 15 收藏 0 点赞 0 评论 0

项目:toollabs 作者: multichill 项目源码 文件源码
def getBelvedereArtistsGenerator():
    """
    Generator to return Auckland Art Gallery paintings

    """
    htmlparser = HTMLParser.HTMLParser()

    basesearchurl=u'http://digital.belvedere.at/people/%s'
    urlregex = u'\<h3\>\<a href\=\"\/people\/(?P<id>\d+)\/[^\"]+\"\>(?P<name>[^\<]+)\<\/a\>\<\/h3\>\<div\>(?P<description>[^\<]+)\<\/div\>'

    # Just loop over the pages
    for i in string.ascii_lowercase:
        searchurl = basesearchurl % (i,)
        print searchurl
        searchPage = requests.get(searchurl)

        matches = re.finditer(urlregex, searchPage.text)
        for match in matches:
            artist = {}
            artist[u'id'] = match.group(u'id')
            artist[u'name'] = htmlparser.unescape(match.group(u'name'))
            artist[u'description'] = htmlparser.unescape(match.group(u'description'))
            artist[u'url'] = u'http://digital.belvedere.at/people/%s/' % (match.group(u'id'),)
            yield artist
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号