def parse3(self, response):
""" ????????????ID """
selector = Selector(response)
text2 = selector.xpath('body//table/tr/td/a/@href').extract()
next_urls = []
for elem in text2:
elem = re.findall('uid=(\d+)', elem)
if elem:
next_urls.append(int(elem[0]))
self.next_ID.pop()
self.next_ID.append(random.choice(next_urls))
self.temp = next_urls[0]
try:
next_url = "http://weibo.cn/u/%s" % self.next_ID[-1]
yield Request(url=next_url, dont_filter=True, callback=self.parse)
except:
self.next_ID.pop()
self.next_ID.append(self.temp)
next_url = "http://weibo.cn/u/%s" % self.temp
yield Request(url=next_url, dont_filter=True, callback=self.parse)
评论列表
文章目录