def handle_starttag(self, tag, attrs):
if tag == 'h3' and attrs == [('class', 'r')]:
self.h3_flag = True
if tag == 'a' and self.h3_flag:
self.a_flag = True
if tag == 'b' and self.a_flag:
self.b_flag = True
if self.a_flag:
for (key, value) in attrs:
if key == 'href':
if value.startswith("/url?"):
m = match('/url\?(url|q)=(.+?)&', value)
if m and len(m.groups()) == 2:
href = urllib2.unquote(m.group(2))
self.link = href
else:
self.link = value
评论列表
文章目录