def parse3(self, response):
""" ????????????ID """
selector = Selector(response)
text2 = selector.xpath('body//table/tr/td/a/@href').extract()
for elem in text2:
elem = re.findall('uid=(\d+)', elem)
if elem:
ID = int(elem[0])
if ID not in self.finish_ID: # ??ID????????????
self.scrawl_ID.append(ID)
url_next = selector.xpath(
u'body//div[@class="pa" and @id="pagelist"]/form/div/a[text()="\u4e0b\u9875"]/@href').extract()
if url_next:
yield Request(url="http://weibo.cn%s" % url_next[0], callback=self.parse3)
评论列表
文章目录