hupuScrapy.py 文件源码

python
阅读 16 收藏 0 点赞 0 评论 0

项目:BaymaxHome 作者: tyhtao1990 项目源码 文件源码
def parse(self, response):
        mongoClient = mongodb_client('localhost', 27017)
        list = []
        print "************************"
        # print response.xpath('//div[@class="about_fonts clearfix"]/p[@class="time_f"]/text()').extract()
        player_away = response.xpath('//table[@id="J_away_content"]/tbody/tr')
        player_home = response.xpath('//table[@id="J_home_content"]/tbody/tr')
        if player_away:
            for player in player_away:
                playerName=player.xpath('td/a/text()').extract()
                if playerName:
                    list.append(playerName + player.xpath('td/text()').extract())
            for player in player_home:
                playerName = player.xpath('td/a/text()').extract()
                if playerName:
                    list.append(playerName + player.xpath('td/text()').extract())
            # print response.xpath('//div[@class="message"]/h2/text()').extract()
            print "************************"

            time = response.xpath('//div[@class="about_fonts clearfix"]/p[@class="time_f"]/text()').extract()
            team = response.xpath('//div[@class="message"]/p/a/text()').extract()
            score = response.xpath('//div[@class="message"]/h2/text()').extract()
            url = response.url

            g = game(time, team, score, list, url)
            print g.__dict__
            # json_g = parsejson(g)
            # print json_g

            # g = ItemLoader(game(), response=response)
            # g.add_xpath('time', '//div[@class="about_fonts clearfix"]/p[@class="time_f"]/text()')
            # g.add_xpath('team', '//div[@class="message"]/p/a/text()')
            # g.add_xpath('score', '//div[@class="message"]/h2/text()')
            # g.add_value('players', list)
            # return g.load_item()

            client = mongoClient.connect()
            db = mongoClient.useDB(client, "hupu_data")
            print mongoClient.insert_one(db, "games", g.__dict__)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号