def parse(self, response):
selector = Selector(response)
ID = response.meta["ID"]
text0 = selector.xpath('body/div[@class="u"]/div[@class="tip2"]').extract_first()
info = InfoItem()
if text0:
num_tweets = re.findall(u'\u5fae\u535a\[(\d+)\]', text0) # ???
num_follows = re.findall(u'\u5173\u6ce8\[(\d+)\]', text0) # ???
num_fans = re.findall(u'\u7c89\u4e1d\[(\d+)\]', text0) # ???
if num_tweets:
info["num_tweets"] = int(num_tweets[0])
if num_follows:
info["num_follows"] = int(num_follows[0])
if num_fans:
info["num_fans"] = int(num_fans[0])
url_information1 = "http://weibo.cn/%s/info" % ID
yield Request(url=url_information1, meta={"item":info,"ID":ID}, dont_filter=True, callback=self.parse1)
评论列表
文章目录