def parse_info(self, response):
selector = scrapy.Selector(response)
item = WeiboWebInfoItem()
info = selector.xpath("body/div[@class='u']/div[@class='tip2']")
info_text = info.extract_first()
try:
item['ID'] = re.findall("uid=(.*?)\">", info_text)[0]
item['TweetsNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
item['FollowerNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
item['FanNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
tweet_url, follower_url = url_generator_for_id(item['ID'])
item['URL'] = tweet_url
except:
pass
basic_info_url = 'http://weibo.cn/%s/info' % item['ID']
yield scrapy.Request(basic_info_url, meta={"item": item}, callback=self.parse_basic_info)
评论列表
文章目录