def load_dictionary(filename):
dataset = xr.open_dataset(filename, engine='scipy')
return dict(dataset.data_vars)
python类open_dataset()的实例源码
def load(self):
"""
Load the state from the restart file.
Returns
-------
state : dict
The model state stored in the restart file.
"""
dataset = xr.open_dataset(self._filename)
state = {}
for name, value in dataset.data_vars.items():
state[name] = DataArray(value[0, :]) # remove time axis
state['time'] = datetime64_to_datetime(dataset['time'][0])
return state
def open_dataset(file_path):
ds = xr.open_dataset(file_path, mask_and_scale=False, drop_variables='crs')
ds.attrs['crs'] = datacube.utils.geometry.CRS('EPSG:32754')
return ds
def __init__(self, path):
self.path = path
if isinstance(path, string_types):
self.ds = xr.open_dataset(path)
def load(path: Path) -> DataSet:
"""
Loads a data set from the specified NetCDF4 file.
Parameters
----------
path: pathlib.Path
Path to the file which should be loaded.
Returns
-------
DataSet
The data set loaded from the specified file
"""
log = logging.getLogger(__name__)
log.info("loading data set from %s", path)
data = xr.open_dataset(str(path)) # type: xr.Dataset
# restore data types
data[_DataVar.FILENAME] = data[_DataVar.FILENAME].astype(np.object).fillna(None)
data[_DataVar.CHUNK_NR] = data[_DataVar.CHUNK_NR].astype(np.object).fillna(None)
data[_DataVar.CV_FOLDS] = data[_DataVar.CV_FOLDS].astype(np.object).fillna(None)
data[_DataVar.PARTITION] = data[_DataVar.PARTITION].astype(np.object).fillna(None)
data[_DataVar.LABEL_NOMINAL] = data[_DataVar.LABEL_NOMINAL].astype(np.object).fillna(None)
data[_DataVar.LABEL_NUMERIC] = data[_DataVar.LABEL_NUMERIC].astype(np.object)
data[_DataVar.FEATURES] = data[_DataVar.FEATURES].astype(np.float32)
return DataSet(data=data,
mutable=False)
def load_netcdf_array(datafile, meta, layer_specs=None):
'''
Loads metadata for NetCDF
Parameters:
:datafile: str: Path on disk to NetCDF file
:meta: dict: netcdf metadata object
:variables: dict<str:str>, list<str>: list of variables to load
Returns:
:new_es: xr.Dataset
'''
logger.debug('load_netcdf_array: {}'.format(datafile))
ds = xr.open_dataset(datafile)
if layer_specs:
data = []
if isinstance(layer_specs, dict):
data = { k: ds[getattr(v, 'name', v)] for k, v in layer_specs.items() }
layer_spec = tuple(layer_specs.values())[0]
if isinstance(layer_specs, (list, tuple)):
data = {getattr(v, 'name', v): ds[getattr(v, 'name', v)]
for v in layer_specs }
layer_spec = layer_specs[0]
data = OrderedDict(data)
else:
data = OrderedDict([(v, ds[v]) for v in meta['variables']])
layer_spec = None
geo_transform = take_geo_transform_from_meta(layer_spec=layer_spec,
required=True,
**meta)
for b, sub_dataset_name in zip(meta['layer_meta'], data):
b['geo_transform'] = meta['geo_transform'] = geo_transform
b['sub_dataset_name'] = sub_dataset_name
new_es = xr.Dataset(data,
coords=_normalize_coords(ds),
attrs=meta)
return new_es
def __call__(self,filename=None,varname=None):
if self.array_type == 'numpy':
out = Dataset(filename).variables[varname][:].squeeze()
elif self.array_type == 'xarray':
ds = xr.open_dataset(filename,chunks=self.chunks,lock=False)
out = ds[varname]
elif self.array_type == 'dask_from_numpy':
d = Dataset(filename).variables[varname][:].squeeze()
out = da.from_array(np.array(d), chunks=self.chunks)
elif self.array_type == 'dask_from_netcdf':
d = Dataset(filename).variables[varname]
out = da.from_array(d, chunks=self.chunks)
return out
def return_xarray_dataset(filename,chunks=None,**kwargs):
"""Return an xarray dataset corresponding to filename.
Parameters
----------
filename : str
path to the netcdf file from which to create a xarray dataset
chunks : dict-like
dictionnary of sizes of chunk for creating xarray.Dataset.
Returns
-------
ds : xarray.Dataset
"""
return xr.open_dataset(filename,chunks=chunks,**kwargs)
def open_data(self, **kwargs):
data = self.get_path()
if self.iTRACE_flag:
ico = xr.open_mfdataset(data['ico'], **kwargs).sortby('time')
ice = xr.open_mfdataset(data['ice'], **kwargs).sortby('time')
igo = xr.open_mfdataset(data['igo'], **kwargs).sortby('time')
igom = xr.open_mfdataset(data['igom'], **kwargs).sortby('time')
return ice, ico, igo, igom
else:
if len(data) > 1:
return xr.open_mfdataset(data, **kwargs).sortby('time')
else:
return xr.open_dataset(data[0], **kwargs).sortby('time')
def __init__(self, dataset_path):
self.dataset_path = dataset_path
try:
self.dataset = xr.open_dataset(self.dataset_path)
except OSError:
print('File not found.')
exit()
def read(self, file_path):
self.file_path = file_path
self.dataset = xr.open_dataset(self.file_path)
self.var_names = self.get_var_names(self.dataset)
def get_xarray(self):
self.dataset = xr.open_dataset(self.dataset_path)
return self.dataset
def open_mfdataset(paths, decode_cf=True, decode_times=True,
decode_coords=True, engine=None, gridfile=None,
t_format=None, **kwargs):
"""
Open multiple files as a single dataset.
This function is essentially the same as the :func:`xarray.open_mfdataset`
function but (as the :func:`open_dataset`) supports additional decoding
and the ``'gdal'`` engine.
You can further specify the `t_format` parameter to get the time
information from the files and use the results to concatenate the files
Parameters
----------
%(xarray.open_mfdataset.parameters.no_engine)s
%(open_dataset.parameters.engine)s
%(get_tdata.parameters.t_format)s
%(CFDecoder.decode_coords.parameters.gridfile)s
Returns
-------
xarray.Dataset
The dataset that contains the variables from `filename_or_obj`"""
if t_format is not None or engine == 'gdal':
if isinstance(paths, six.string_types):
paths = sorted(glob(paths))
if not paths:
raise IOError('no files to open')
if t_format is not None:
time, paths = get_tdata(t_format, paths)
kwargs['concat_dim'] = time
if engine == 'gdal':
from psyplot.gdal_store import GdalStore
paths = list(map(GdalStore, paths))
engine = None
kwargs['lock'] = False
ds = xr.open_mfdataset(
paths, decode_cf=decode_cf, decode_times=decode_times, engine=engine,
decode_coords=False, **kwargs)
if decode_cf:
return CFDecoder.decode_ds(ds, gridfile=gridfile, inplace=True,
decode_coords=decode_coords,
decode_times=decode_times)
return ds
def get_nldas_fora_X_and_vic_y(year, month, day, hour,
vic_or_fora, band_order=None,
prefix=None, data_arrs=None,
keep_columns=None):
'''Load data from VIC for NLDAS Forcing A Grib files
Parameters:
year: year of forecast time
month: month of forecast time
day: day of forecast time
vic_or_fora: string indicating which NLDAS data source
band_order: list of DataArray names already loaded
prefix: add a prefix to the DataArray name from Grib
data_arrs: Add the DataArrays to an existing dict
keep_columns: Retain only the DataArrays in this list, if given
Returns:
tuple or (data_arrs, band_order) where data_arrs is
an OrderedDict of DataArrays and band_order is their
order when they are flattened from rasters to a single
2-D matrix
'''
data_arrs = data_arrs or OrderedDict()
band_order = band_order or []
path = get_file(year, month, day, hour, dset=vic_or_fora)
dset = xr.open_dataset(path, engine='pynio')
for k in dset.data_vars:
if keep_columns and k not in keep_columns:
continue
arr = getattr(dset, k)
if sorted(arr.dims) != ['lat_110', 'lon_110']:
continue
#print('Model: ',f, 'Param:', k, 'Detail:', arr.long_name)
lon, lat = arr.lon_110, arr.lat_110
geo_transform = [lon.Lo1, lon.Di, 0.0,
lat.La1, 0.0, lat.Dj]
shp = arr.shape
canvas = Canvas(geo_transform, shp[1], shp[0], arr.dims)
arr.attrs['canvas'] = canvas
if prefix:
band_name = '{}_{}'.format(prefix, k)
else:
band_name = k
data_arrs[band_name] = arr
band_order.append(band_name)
return data_arrs, band_order
def get_filelist(pattern, date_range=None, timevar='time', calendar=None):
'''given a glob pattern, return a list of files between daterange'''
files = glob.glob(pattern)
if date_range is not None:
date_range = pd.to_datetime(list(date_range)).values
sublist = []
for f in files:
try:
kwargs = dict(mask_and_scale=False, concat_characters=False,
decode_coords=False)
if calendar:
ds = xr.open_dataset(f, decode_cf=False,
decode_times=False, **kwargs)
if (('XTIME' in ds) and not
('calendar' not in ds['XTIME'].attrs)):
ds['XTIME'].attrs['calendar'] = calendar
elif 'calendar' not in ds[timevar].attrs:
ds[timevar].attrs['calendar'] = calendar
# else decode using callendar attribute in file
ds = xr.decode_cf(ds, decode_times=True, **kwargs)
else:
ds = xr.open_dataset(f, decode_cf=True, decode_times=True,
**kwargs)
except Exception as e:
warnings.warn('failed to open {}: {}'.format(f, e))
try:
ds[timevar] = ds['XTIME']
except KeyError:
pass
if CHECK_TIMEVARS:
try:
check_times(ds[timevar].values, f=f)
except ValueError as e:
warnings.warn(
'time check raised an error for file %s: %s' % (f, e))
start = ds[timevar].values[0]
end = ds[timevar].values[-1]
ds.close()
if (((start >= date_range[0]) and (start <= date_range[1])) or
((end >= date_range[0]) and (end <= date_range[1])) or
(start <= date_range[0]) and (end >= date_range[1])):
sublist.append(f)
files = sublist
files.sort()
return files
def load_data(file, varname, extent=None, period=None, **kwargs):
"""
Loads netCDF files and extracts data given a spatial extend and time period
of interest.
"""
# Open either single or multi-file data set depending if list of wildcard
if "*" in file or isinstance(file, list):
ds = xr.open_mfdataset(file, decode_times=False)
else:
ds = xr.open_dataset(file, decode_times=False)
# Construct condition based on spatial extents
if extent:
n, e, s, w = extent
ds = ds.sel(lat=(ds.lat >= s) & (ds.lat <= n))
# Account for extent crossing Greenwich
if w > e:
ds = ds.sel(lon=(ds.lon >= w) | (ds.lon <= e))
else:
ds = ds.sel(lon=(ds.lon >= w) & (ds.lon <= e))
# Construct condition base on time period
if period:
t1 = date2num(datetime(*period[0]), ds.time.units, ds.time.calendar)
t2 = date2num(datetime(*period[1]), ds.time.units, ds.time.calendar)
ds = ds.sel(time=(ds.time >= t1) & (ds.time <= t2))
# Extra keyword arguments to select from additional dimensions (e.g. plev)
if kwargs:
ds = ds.sel(**kwargs)
# Load in the data to a numpy array
dates = num2date(ds.time, ds.time.units, ds.time.calendar)
arr = ds[varname].values
lat = ds.lat.values
lon = ds.lon.values
# Convert pr units to mm/day
if ds[varname].units == 'kg m-2 s-1':
arr *= 86400
# Convert tas units to degK
elif ds[varname].units == 'K':
arr -= 273.15
return arr, lat, lon, dates
def load_variable(var_name, path_to_file, squeeze=False,
fix_times=True, **extr_kwargs):
""" Interface for loading an extracted variable into memory, using
either iris or xarray. If `path_to_file` is instead a raw dataset,
then the entire contents of the file will be loaded!
Parameters
----------
var_name : string
The name of the variable to load
path_to_file : string
Location of file containing variable
squeeze : bool
Load only the requested field (ignore all others) and
associated dims
fix_times : bool
Correct the timestamps to the middle of the bounds
in the variable metadata (CESM puts them at the right
boundary which sucks!)
extr_kwargs : dict
Additional keyword arguments to pass to the extractor
"""
logger.info("Loading %s from %s" % (var_name, path_to_file))
ds = xr.open_dataset(path_to_file, decode_cf=False, **extr_kwargs)
# TODO: Revise this logic as part of generalizing time post-processing.
# Fix time unit, if necessary
# interval, timestamp = ds.time.units.split(" since ")
# timestamp = timestamp.split(" ")
# yr, mm, dy = timestamp[0].split("-")
#
# if int(yr) < 1650:
# yr = 2001
# yr = str(yr)
#
# # Re-construct at Jan 01, 2001 and re-set
# timestamp[0] = "-".join([yr, mm, dy])
# new_units = " ".join([interval, "since"] + timestamp)
# ds.time.attrs['units'] = new_units
# TODO: Generalize time post-processing.
# if fix_times:
# assert hasattr(ds, 'time_bnds')
# bnds = ds.time_bnds.values
# mean_times = np.mean(bnds, axis=1)
#
# ds.time.values = mean_times
# Be pedantic and check that we don't have a "missing_value" attr
for field in ds:
if hasattr(ds[field], 'missing_value'):
del ds[field].attrs['missing_value']
# Lazy decode CF
# TODO: There's potentially a bug where decode_cf eagerly loads dask arrays
# ds = xr.decode_cf(ds)
return ds