get_data.py 文件源码-python代码片段

get_data.py 文件源码

python

阅读 33 收藏 0 点赞 0 评论 0

项目：X-ray-classification 作者: bendidi 项目源码文件源码

def extract(url):
    global img_no

    try :
        img_no += 1
        r = requests.get(url)
        tree = html.fromstring(r.text)

        div = tree.xpath('//table[@class="masterresultstable"]\
            //div[@class="meshtext-wrapper-left"]')
    except : div=[]

    if div != []:
        div = div[0]
    else:
        return

    typ = div.xpath('.//strong/text()')[0]
    items = div.xpath('.//li/text()')
    img = tree.xpath('//img[@id="theImage"]/@src')[0]


    final_data[img_no] = {}
    final_data[img_no]['type'] = typ
    final_data[img_no]['items'] = items
    final_data[img_no]['img'] = domain + img
    try :
        urllib.urlretrieve(domain+img, path+str(img_no)+".png")
        with open('data_new.json', 'w') as f:
            json.dump(final_data, f)

        output = "Downloading Images : {}".format(img_no)
        sys.stdout.write("\r\x1b[K" + output)
        sys.stdout.flush()
    except :return