def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"code": "HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[]):
if 'name' in setinfo:
url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['name'].lower().replace('of', '').replace(
' ', ' ').replace(' ', '-')
page = requests.get(url)
tree = html.fromstring(page.content)
cards = []
cardtree = tree.xpath('//*[@id="content-detail-page-of-an-article"]')
for child in cardtree:
cardElements = child.xpath('//*/p/img')
cardcount = 0
for cardElement in cardElements:
card = {
"name": cardElement.attrib['alt'].replace(u"\u2019", '\'').split(' /// ')[0],
"img": cardElement.attrib['src']
}
card["url"] = card["img"]
#card["cmc"] = 0
#card["manaCost"] = ""
#card["type"] = ""
#card["types"] = []
#card["text"] = ""
#card["colorIdentity"] = [""]
# if card['name'] in split_cards:
# card["names"] = [card['name'], split_cards[card['name']]]
# card["layout"] = "split"
#notSplit = True
# for backsplit in split_cards:
# if card['name'] == split_cards[backsplit]:
# notSplit = False
# if not card['name'] in delete_cards:
cards.append(card)
cardcount += 1
fullspoil = {"cards": cards}
print "Spoil Gallery has " + str(cardcount) + " cards."
download_images(fullspoil['cards'], setinfo['code'])
fullspoil = get_rarities_by_symbol(fullspoil, setinfo['code'])
fullspoil = get_mana_symbols(fullspoil, setinfo['code'])
#fullspoil = get_colors_by_frame(fullspoil, setinfo['code'])
return fullspoil
评论列表
文章目录