SpiderMain.py 文件源码-python代码片段

SpiderMain.py 文件源码

python

阅读 18 收藏 0 点赞 0 评论 0

项目：WeiboWebSpider 作者: Apocally 项目源码文件源码

def parse_info(self, response):
        selector = scrapy.Selector(response)
        item = WeiboWebInfoItem()
        info = selector.xpath("body/div[@class='u']/div[@class='tip2']")
        info_text = info.extract_first()
        try:
            item['ID'] = re.findall("uid=(.*?)\">", info_text)[0]
            item['TweetsNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
            item['FollowerNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
            item['FanNum'] = re.findall("??\[(.*?)\]</span>", info_text)[0]
            tweet_url, follower_url = url_generator_for_id(item['ID'])
            item['URL'] = tweet_url
        except:
            pass
        basic_info_url = 'http://weibo.cn/%s/info' % item['ID']
        yield scrapy.Request(basic_info_url, meta={"item": item}, callback=self.parse_basic_info)