def sample(month, days, **kwargs):
print('Sample - Month: {} Days: {}'.format(month, days))
files = []
for year in YEARS:
pattern = PATTERN.format(year, month)
fs = glob.glob(pattern)
dates = [split_fname(f) for f in fs]
keep = [idx for idx, d in enumerate(dates)
if d[1] == month and d[2] in days]
files.extend(fs[idx] for idx in keep)
print('Sample {} files'.format(len(files)))
x = xr.open_mfdataset(files, chunks=CHUNKS or None)
x.attrs['sample_kwargs'] = {'month': month, 'days': days}
x.attrs['band_order'] = [TEMP_BAND]
x.attrs['old_dims'] = [getattr(x, TEMP_BAND).dims[1:]]
x.attrs['old_coords'] = {k: v for k, v in x.coords.items()
if k in ('lon', 'lat',)}
return normalize_in_time(x)
python类open_mfdataset()的实例源码
def return_xarray_mfdataset(filename,chunks=None,**kwargs):
"""Return an xarray dataset corresponding to filename which may include
wildcards (e.g. file_*.nc).
Parameters
----------
filename : str
path to a netcdf file or several netcdf files from which to create a
xarray dataset
chunks : dict-like
dictionnary of sizes of chunk for creating xarray.Dataset.
Returns
------
ds : xarray.Dataset
"""
return xr.open_mfdataset(filename,chunks=chunks,**kwargs)
def month_means(month):
pattern = MONTHLY_PATTERN.format(month)
files = glob.glob(pattern)
x = xr.open_mfdataset(files, chunks=CHUNKS or None)
return x.mean(dim='time')
def __init__(self):
self.DATA_PATH = os.environ['iTRACE_DATA']
self.solin_jja = xr.open_mfdataset(os.path.join(self.DATA_PATH, 'forcing/*.SOLIN.*.JJA.nc'))
self.solin_djf = xr.open_mfdataset(os.path.join(self.DATA_PATH, 'forcing/*.SOLIN.*.DJF.nc'))
self.ghgs = xr.open_mfdataset(os.path.join(self.DATA_PATH, 'forcing/iTRACE_ghgs.nc'), decode_times=False)
# ITRACE: data path for iTRACE
def open_data(self, **kwargs):
data = self.get_path()
if self.iTRACE_flag:
ico = xr.open_mfdataset(data['ico'], **kwargs).sortby('time')
ice = xr.open_mfdataset(data['ice'], **kwargs).sortby('time')
igo = xr.open_mfdataset(data['igo'], **kwargs).sortby('time')
igom = xr.open_mfdataset(data['igom'], **kwargs).sortby('time')
return ice, ico, igo, igom
else:
if len(data) > 1:
return xr.open_mfdataset(data, **kwargs).sortby('time')
else:
return xr.open_dataset(data[0], **kwargs).sortby('time')
def open_mfdataset(paths, decode_cf=True, decode_times=True,
decode_coords=True, engine=None, gridfile=None,
t_format=None, **kwargs):
"""
Open multiple files as a single dataset.
This function is essentially the same as the :func:`xarray.open_mfdataset`
function but (as the :func:`open_dataset`) supports additional decoding
and the ``'gdal'`` engine.
You can further specify the `t_format` parameter to get the time
information from the files and use the results to concatenate the files
Parameters
----------
%(xarray.open_mfdataset.parameters.no_engine)s
%(open_dataset.parameters.engine)s
%(get_tdata.parameters.t_format)s
%(CFDecoder.decode_coords.parameters.gridfile)s
Returns
-------
xarray.Dataset
The dataset that contains the variables from `filename_or_obj`"""
if t_format is not None or engine == 'gdal':
if isinstance(paths, six.string_types):
paths = sorted(glob(paths))
if not paths:
raise IOError('no files to open')
if t_format is not None:
time, paths = get_tdata(t_format, paths)
kwargs['concat_dim'] = time
if engine == 'gdal':
from psyplot.gdal_store import GdalStore
paths = list(map(GdalStore, paths))
engine = None
kwargs['lock'] = False
ds = xr.open_mfdataset(
paths, decode_cf=decode_cf, decode_times=decode_times, engine=engine,
decode_coords=False, **kwargs)
if decode_cf:
return CFDecoder.decode_ds(ds, gridfile=gridfile, inplace=True,
decode_coords=decode_coords,
decode_times=decode_times)
return ds
def load_data(file, varname, extent=None, period=None, **kwargs):
"""
Loads netCDF files and extracts data given a spatial extend and time period
of interest.
"""
# Open either single or multi-file data set depending if list of wildcard
if "*" in file or isinstance(file, list):
ds = xr.open_mfdataset(file, decode_times=False)
else:
ds = xr.open_dataset(file, decode_times=False)
# Construct condition based on spatial extents
if extent:
n, e, s, w = extent
ds = ds.sel(lat=(ds.lat >= s) & (ds.lat <= n))
# Account for extent crossing Greenwich
if w > e:
ds = ds.sel(lon=(ds.lon >= w) | (ds.lon <= e))
else:
ds = ds.sel(lon=(ds.lon >= w) & (ds.lon <= e))
# Construct condition base on time period
if period:
t1 = date2num(datetime(*period[0]), ds.time.units, ds.time.calendar)
t2 = date2num(datetime(*period[1]), ds.time.units, ds.time.calendar)
ds = ds.sel(time=(ds.time >= t1) & (ds.time <= t2))
# Extra keyword arguments to select from additional dimensions (e.g. plev)
if kwargs:
ds = ds.sel(**kwargs)
# Load in the data to a numpy array
dates = num2date(ds.time, ds.time.units, ds.time.calendar)
arr = ds[varname].values
lat = ds.lat.values
lon = ds.lon.values
# Convert pr units to mm/day
if ds[varname].units == 'kg m-2 s-1':
arr *= 86400
# Convert tas units to degK
elif ds[varname].units == 'K':
arr -= 273.15
return arr, lat, lon, dates