def extract(url):
global img_no
try :
img_no += 1
r = requests.get(url)
tree = html.fromstring(r.text)
div = tree.xpath('//table[@class="masterresultstable"]\
//div[@class="meshtext-wrapper-left"]')
except : div=[]
if div != []:
div = div[0]
else:
return
typ = div.xpath('.//strong/text()')[0]
items = div.xpath('.//li/text()')
img = tree.xpath('//img[@id="theImage"]/@src')[0]
final_data[img_no] = {}
final_data[img_no]['type'] = typ
final_data[img_no]['items'] = items
final_data[img_no]['img'] = domain + img
try :
urllib.urlretrieve(domain+img, path+str(img_no)+".png")
with open('data_new.json', 'w') as f:
json.dump(final_data, f)
output = "Downloading Images : {}".format(img_no)
sys.stdout.write("\r\x1b[K" + output)
sys.stdout.flush()
except :return
评论列表
文章目录