scrape.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:hearthscan-bot 作者: d-schmidt 项目源码 文件源码
def getHearthpwnIdAndUrl(name, set, type, isToken, session):
    log.debug("getHearthpwnIdAndUrl() getting for %s", name)
    # hearthpwn is also weird
    hpname_hacked = name.replace('-', ' ').replace('!', '')
    premium = 0 if isToken else 1

    # filter-name={}&filter-premium={}&filter-type={}&filter-set={}
    r = session.get(setUrlTempl.format(hpname_hacked, premium, hsTypeId[type], setNameIds[set]))
    r.raise_for_status()
    html = fromstring(r.text)

    images = html.xpath('//td[@class="visual-image-cell"]/a/img')
    descs = html.xpath('//td[@class="visual-details-cell"]/h3/a')

    for i in range(len(images)):
        title = descs[i].text

        if title.lower() == name.lower():
            image = images[i].get('src')
            if not image:
                image = 'http://media-hearth.cursecdn.com/avatars/148/738/687.png'
            # /cards/31128-annoy-o-tron-fanclub
            hpid = hpIdRegex.match(images[i].get('data-href')).group(1)
            return int(hpid), image.replace('http://', 'https://').lower()

    log.debug("getHearthpwnIdAndUrl() card not found at hearthpwn '%s' '%s'", set, name)
    raise Exception("getHearthpwnIdAndUrl() card " + name + " not found at hearthpwn")
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号