def getBelvedereArtistsGenerator():
"""
Generator to return Auckland Art Gallery paintings
"""
htmlparser = HTMLParser.HTMLParser()
basesearchurl=u'http://digital.belvedere.at/people/%s'
urlregex = u'\<h3\>\<a href\=\"\/people\/(?P<id>\d+)\/[^\"]+\"\>(?P<name>[^\<]+)\<\/a\>\<\/h3\>\<div\>(?P<description>[^\<]+)\<\/div\>'
# Just loop over the pages
for i in string.ascii_lowercase:
searchurl = basesearchurl % (i,)
print searchurl
searchPage = requests.get(searchurl)
matches = re.finditer(urlregex, searchPage.text)
for match in matches:
artist = {}
artist[u'id'] = match.group(u'id')
artist[u'name'] = htmlparser.unescape(match.group(u'name'))
artist[u'description'] = htmlparser.unescape(match.group(u'description'))
artist[u'url'] = u'http://digital.belvedere.at/people/%s/' % (match.group(u'id'),)
yield artist
评论列表
文章目录