mtgs_scraper.py 文件源码-python代码片段

mtgs_scraper.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：Magic-Spoiler 作者: Cockatrice 项目源码文件源码

def scrape_mtgs_images(url='http://www.mtgsalvation.com/spoilers/183-hour-of-devastation', mtgscardurl='http://www.mtgsalvation.com/cards/hour-of-devastation/', exemptlist=[]):
    page = requests.get(url)
    tree = html.fromstring(page.content)
    cards = {}
    cardstree = tree.xpath('//*[contains(@class, "log-card")]')
    for child in cardstree:
        if child.text in exemptlist:
            continue
        childurl = mtgscardurl + child.attrib['data-card-id'] + '-' + child.text.replace(
            ' ', '-').replace("'", "").replace(',', '').replace('-//', '')
        cardpage = requests.get(childurl)
        tree = html.fromstring(cardpage.content)
        cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]')
        try:
            cardurl = cardtree[0].attrib['src']
        except:
            cardurl = ''
            pass
        cards[child.text] = {
            "url": cardurl
        }
        time.sleep(.2)
    return cards