crawler.py 文件源码-python代码片段

crawler.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：tf-tutorial 作者: zchen0211 项目源码文件源码

def download(start, end):
  parse_dict = np.load('parse_dict')
  image = urllib.URLopener()
  for k in parse_dict.keys()[start:end]:
    # makedir of k
    log.info('crawling images of class %s' % k)
    data_path = os.path.join('/media/DATA/ImageNet/Extra/', k)
    if not os.path.exists(data_path):
      os.mkdir(data_path)
      cnt = 0
      for link in parse_dict[k][:500]:
        fn = os.path.join(data_path, '%s_%d.jpg' %(k, cnt))
        cnt += 1
        if cnt % 20 == 0: log.info('%d images' % cnt)
      # print fn
      try: 
        image.retrieve(link, fn)
      except IOError:
        cnt -= 1
    # print len(parse_dict[k])