ndvi_hdf_download.py 文件源码-python代码片段

def mirror_modis_dates_html(base_url, mirror_dir, use_wget=False):
    """
    Download all MODIS date listing pages to a local directory.
    Usually, a MODIS listing for a date should not change (only new dates
    should be added), so there should be no need to re-download.
    """
    ndownloads = 0
    dates_urls = collect_all_dates_pages(base_url)
    utils.mkdir_p(mirror_dir)
    for date, url in dates_urls:
        fname = os.path.join(mirror_dir, date + '.html')
        if not os.path.exists(fname):
            print 'Downloading ', fname
            if use_wget:
                subprocess.check_call('/usr/bin/wget %s -O %s' % (url, fname),
                                      shell=True)
            else:
                urllib.urlretrieve(url, fname)
            ndownloads += 1
            # The MODIS MOLT repository server doesn't return Content-Length
            # so urllib cannot tell if it downloaded the whole html or was
            # just disconnected, which could lead to incomplete HTML being
            # downloaded. So we check if the downloaded file ends with </html>
            with open(fname, 'r') as f:
                # seek 10 bytes from the end
                f.seek(-10, 2)
                line = f.read(10)
                if "</html>" not in line:
                    raise urllib.ContentTooShortError(
                        "Couldn't find </html> in downloaded file, probably a partial download", ""
                    )

            # Just avoid firing requests as fast as possible
            time.sleep(0.1)

    return ndownloads > 0