def _visitfunc(self, name, node):
level = len(name.split('/'))
indent = ' '*4*(level-1)
#indent = '<span style="color:blue;">'.format(level*4)
localname = name.split('/')[-1]
#search_text = self.settings['search_text'].lower()
search_text = self.search_text
if search_text and (search_text in localname.lower()):
localname = """<span style="color: red;">{}</span>""".format(localname)
if isinstance(node, h5py.Group):
self.tree_str += indent +"|> <b>{}/</b><br/>".format(localname)
elif isinstance(node, h5py.Dataset):
self.tree_str += indent +"|D <b>{}</b>: {} {}<br/>".format(localname, node.shape, node.dtype)
for key, val in node.attrs.items():
if search_text:
if search_text in str(key).lower():
key = """<span style="color: red;">{}</span>""".format(key)
if search_text in str(val).lower():
val = """<span style="color: red;">{}</span>""".format(val)
self.tree_str += indent+" |- <i>{}</i> = {}<br/>".format(key, val)
python类Dataset()的实例源码
def get_all_hdf_dataset(hdf, fileter_func=None, path='/'):
res = []
# init queue
q = queue()
for i in hdf[path].keys():
q.put(i)
# get list of all file
while not q.empty():
p = q.pop()
if 'Dataset' in str(type(hdf[p])):
if fileter_func is not None and not fileter_func(p):
continue
res.append(p)
elif 'Group' in str(type(hdf[p])):
for i in hdf[p].keys():
q.put(p + '/' + i)
return res
def from_hdf5(fpath, dataset="data"):
"""Loading data from hdf5 files that was stored by \
:meth:`~wradlib.io.to_hdf5`
Parameters
----------
fpath : string
path to the hdf5 file
dataset : string
name of the Dataset in which the data is stored
"""
f = h5py.File(fpath, mode="r")
# Check whether Dataset exists
if dataset not in f.keys():
print("Cannot read Dataset <%s> from hdf5 file <%s>" % (dataset, f))
f.close()
sys.exit()
data = np.array(f[dataset][:])
# get metadata
metadata = {}
for key in f[dataset].attrs.keys():
metadata[key] = f[dataset].attrs[key]
f.close()
return data, metadata
def setup_openpmd_species_record( self, grp, quantity ) :
"""
Set the attributes that are specific to a species record
Parameter
---------
grp : an h5py.Group object or h5py.Dataset
The group that correspond to `quantity`
(in particular, its path must end with "/<quantity>")
quantity : string
The name of the record being setup
e.g. "position", "momentum"
"""
# Generic setup
self.setup_openpmd_record( grp, quantity )
# Weighting information
grp.attrs["macroWeighted"] = macro_weighted_dict[quantity]
grp.attrs["weightingPower"] = weighting_power_dict[quantity]
def read_h5(fn):
"""Read h5 file into dict.
Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start
with a leading slash even if written without (see :func:`write_h5`).
Parameters
----------
fn : str
filename
Examples
--------
>>> read_h5('foo.h5').keys()
['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z']
"""
fh = h5py.File(fn, mode='r')
dct = {}
def get(name, obj, dct=dct):
if isinstance(obj, h5py.Dataset):
_name = name if name.startswith('/') else '/'+name
dct[_name] = obj.value
fh.visititems(get)
fh.close()
return dct
def _resize_stacks(self, stack_length, group_prefix="/"):
if group_prefix == "/":
log_info(logger, self._log_prefix + "Resize datasets to new length: %i" % stack_length)
if stack_length == 0:
log_warning(logger, self._log_prefix + "Cannot resize stacks to length 0. Skip resize stacks.")
return
keys = self._f[group_prefix].keys()
keys.sort()
for k in keys:
name = group_prefix + k
if isinstance(self._f[name], h5py.Dataset):
if self._is_stack(name):
self._resize_stack(stack_length, name)
else:
self._resize_stacks(stack_length, name + "/")
self._stack_length = stack_length
def h5py_dataset_iterator(self,g, prefix=''):
for key in g.keys():
item = g[key]
path = '{}/{}'.format(prefix, key)
keys = [i for i in item.keys()]
if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
data = {'path':path}
for k in keys:
if not isinstance(item[k], h5py.Group):
dataset = np.array(item[k].value)
if type(dataset) is np.ndarray:
if dataset.size != 0:
if type(dataset[0]) is np.bytes_:
dataset = [a.decode('ascii') for a in dataset]
data.update({k:dataset})
yield data
else: # test for group (go down)
yield from self.h5py_dataset_iterator(item, path)
def load_h5(filename):
'''Load data from an hdf5 file created by `save_h5`.
Parameters
----------
filename : str
Path to the hdf5 file
Returns
-------
data : dict
The key-value data stored in `filename`
See Also
--------
save_h5
'''
data = {}
def collect(k, v):
if isinstance(v, h5py.Dataset):
data[k] = v.value
with h5py.File(filename, mode='r') as hf:
hf.visititems(collect)
return data
def _populate_data(self, ret_dict, obj, name):
"""Read data recursively from an HDF5 value and add it to `ret_dict`.
If `obj` is a dataset, it is added to `ret_dict`. If `obj` is a group,
a sub-dictionary is created in `ret_dict` for `obj` and populated
recursively by calling this function on all of the items in the `obj`
group.
Parameters
----------
ret_dict : OrderedDict
Dictionary to which metadata will be added.
obj : h5py.Dataset | h5py.Group
HDF5 value from which to read metadata.
name : valid dictionary key
Dictionary key in `ret_dict` under which to store the data from
`obj`.
"""
if isinstance(obj, h5py.Dataset):
# [()] casts a Dataset as a numpy array
ret_dict[name] = obj[()]
else:
# create a dictionary for this group
ret_dict[name] = {}
for key, value in obj.items():
self._populate_data(ret_dict[name], value, key)
def slice_array(X, start=None, stop=None):
if type(X) == list:
if hasattr(start, '__len__'):
return [x[start] for x in X]
else:
return [x[start:stop] for x in X]
if H5PY_SUPPORTED:
if type(X) == h5py.Dataset:
return [X[i] for i in start]
if hasattr(start, '__len__'):
return X[start]
else:
return X[start:stop]
def Dataset(self):
if self._Dataset is None:
try:
from netCDF4 import Dataset
except ImportError:
Dataset = NotAModule(self._name)
self._Dataset = Dataset
return self._Dataset
def Dataset(self):
if self._err:
raise self._err
if self._Dataset is None:
try:
from h5py import Dataset
except ImportError:
Dataset = NotAModule(self._name)
self._Dataset = Dataset
return self._Dataset
def __traverse_add(self, item, filename):
if isinstance(item, h5py.Dataset):
self.add_dataset(item, filename + item.name)
elif isinstance(item, h5py.Group):
for k in item:
self.__traverse_add(item[k], filename)
else:
print("Skipping " + item.name)
def _convert_to_np_dtype(dset):
"""
Given an HDF5 dataset, return the values in a numpy-builtin datatype
Parameters
----------
dset : h5py.Dataset
HDF5 (h5py) dataset
Returns
-------
out : numpy.ndarray (dtype = numpy built-in)
Note
----
The software accounts for big-/little-endianness, and the inability of \
hdf5 to natively store complex numbers.
"""
assert isinstance(dset, _h5py.Dataset), 'Input is not of type h5py.Dataset'
# Single datatype
if len(dset.dtype) == 0:
converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('='))
dset.read_direct(converted)
if issubclass(converted.dtype.type, _np.integer): # Integer to float
converted = converted.astype(_np.float)
return converted
#Compound datatype of length 2-- assumed ('Re','Im')
elif len(dset.dtype) == 2:
print('Warning: h5py.complex_names set incorrectly using \'{}\' and \'{}\' \
for Re and Im, respectively'.format(dset.dtype.names[0], dset.dtype.names[1]))
_h5py.get_config().complex_names = (dset.dtype.names[0],dset.dtype.names[1])
dset = dset.file[dset.name]
converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('='))
dset.read_direct(converted)
# Unknown datatype
else:
print('Warning: Unknown datatype. Returning dataset values as is.')
return dset.value
return converted
def check_serialize(self, data):
ret = self.serializer('w', data)
dset = self.hdf5file['w']
self.assertIsInstance(dset, h5py.Dataset)
self.assertEqual(dset.shape, data.shape)
self.assertEqual(dset.size, data.size)
self.assertEqual(dset.dtype, data.dtype)
read = numpy.empty((2, 3), dtype=numpy.float32)
dset.read_direct(read)
numpy.testing.assert_array_equal(read, cuda.to_cpu(data))
self.assertEqual(dset.compression_opts, 3)
self.assertIs(ret, data)
def test_serialize_scalar(self):
ret = self.serializer('x', 10)
dset = self.hdf5file['x']
self.assertIsInstance(dset, h5py.Dataset)
self.assertEqual(dset.shape, ())
self.assertEqual(dset.size, 1)
self.assertEqual(dset.dtype, int)
read = numpy.empty((), dtype=numpy.int32)
dset.read_direct(read)
self.assertEqual(read, 10)
self.assertEqual(dset.compression_opts, None)
self.assertIs(ret, 10)
def loadDataHDF5(data):
if isinstance(data,h5py.File) or isinstance(data,h5py.Group):
return {k:loadDataHDF5(v) for k,v in data.iteritems()}
elif isinstance(data,h5py.Dataset):
return data.value
else:
print 'unhandled datatype: %s' % type(data)
def _visitfunc(self, name, node):
level = len(name.split('/'))
indent = ' '*level
localname = name.split('/')[-1]
if isinstance(node, h5py.Group):
self.tree_str += indent +"|> {}\n".format(localname)
elif isinstance(node, h5py.Dataset):
self.tree_str += indent +"|D {}: {} {}\n".format(localname, node.shape, node.dtype)
for key, val in node.attrs.items():
self.tree_str += indent+" |- {} = {}\n".format(key, val)
def __init__(self, dataset, hdf=None, dtype=None, shape=None):
super(Hdf5Data, self).__init__()
raise Exception("Hdf5Data is under-maintanance!")
# default chunks size is 32 (reduce complexity of the works)
self._chunk_size = 32
if isinstance(hdf, str):
hdf = open_hdf5(hdf)
if hdf is None and not isinstance(dataset, h5py.Dataset):
raise ValueError('Cannot initialize dataset without hdf file')
if isinstance(dataset, h5py.Dataset):
self._data = dataset
self._hdf = dataset.file
else:
if dataset not in hdf: # not created dataset
if dtype is None or shape is None:
raise ValueError('dtype and shape must be specified if '
'dataset has not created in hdf5 file.')
shape = tuple([0 if i is None else i for i in shape])
hdf.create_dataset(dataset, dtype=dtype,
chunks=_get_chunk_size(shape, self._chunk_size),
shape=shape, maxshape=(None, ) + shape[1:])
self._data = hdf[dataset]
if shape is not None and self._data[0].shape[1:] != shape[1:]:
raise ValueError('Shape mismatch between predefined dataset '
'and given shape, {} != {}'
''.format(shape, self._data[0].shape))
self._hdf = hdf
# ==================== properties ==================== #
def collect_metadata(self, name, obj):
if isinstance(obj, h5py.Dataset):
self.file_content[name] = obj
self.file_content[name + "/dtype"] = obj.dtype
self.file_content[name + "/shape"] = obj.shape
self._collect_attrs(name, obj.attrs)
def __getitem__(self, key):
val = self.file_content[key]
if isinstance(val, h5py.Dataset):
# these datasets are closed and inaccessible when the file is closed, need to reopen
return h5py.File(self.filename, 'r')[key].value
return val
def setup_openpmd_species_component( self, grp, quantity ) :
"""
Set the attributes that are specific to a species component
Parameter
---------
grp : an h5py.Group object or h5py.Dataset
quantity : string
The name of the component
"""
self.setup_openpmd_component( grp )
def setup_openpmd_record( self, dset, quantity ) :
"""
Sets the attributes of a record, that comply with OpenPMD
Parameter
---------
dset : an h5py.Dataset or h5py.Group object
quantity : string
The name of the record considered
"""
dset.attrs["unitDimension"] = unit_dimension_dict[quantity]
# No time offset (approximation)
dset.attrs["timeOffset"] = 0.
def setup_openpmd_component( self, dset ) :
"""
Sets the attributes of a component, that comply with OpenPMD
Parameter
---------
dset : an h5py.Dataset or h5py.Group object
"""
dset.attrs["unitSI"] = 1.
def reconstruct(particles, events=-1,
config='delphes_card_ATLAS_NoFastJet.tcl',
objects='Calorimeter/towers',
random_state=0):
if not os.path.exists(config):
internal_config = os.path.join(
os.environ.get('DEEPJETS_DIR'),
'config', 'delphes', config)
if not os.path.isabs(config) and os.path.exists(internal_config):
log.warning("{0} does not exist but using internal "
"config with the same name instead: {1}".format(
config, internal_config))
config = internal_config
else:
raise IOError("Delphes config not found: {0}".format(config))
delphes = DelphesWrapper(config, random_state, objects)
kwargs = dict()
if isinstance(particles, MCInput):
reco_func = reconstruct_mc
elif isinstance(particles, h5.Dataset):
reco_func = reconstruct_hdf5
else:
reco_func = reconstruct_iterable
kwargs['events'] = events
if not inspect.isgenerator(particles) and not isinstance(particles, list):
# handle case where input is just one event
particles = [particles]
for event in reco_func(delphes, particles, **kwargs):
yield event
def __getitem__(self, key):
h5py_item = self.h5py_group[key]
if isinstance(h5py_item, h5py.Group):
if 'h5sparse_format' in h5py_item.attrs:
# detect the sparse matrix
return Dataset(h5py_item)
else:
return Group(h5py_item)
elif isinstance(h5py_item, h5py.Dataset):
return h5py_item
else:
raise ValueError("Unexpected item type.")
def create_dataset(self, name, shape=None, dtype=None, data=None,
format='csr', indptr_dtype=np.int64, indices_dtype=np.int32,
**kwargs):
"""Create 4 datasets in a group to represent the sparse array."""
if data is None:
raise NotImplementedError("Only support create_dataset with "
"existed data.")
elif isinstance(data, Dataset):
group = self.h5py_group.create_group(name)
group.attrs['h5sparse_format'] = data.h5py_group.attrs['h5sparse_format']
group.attrs['h5sparse_shape'] = data.h5py_group.attrs['h5sparse_shape']
group.create_dataset('data', data=data.h5py_group['data'],
dtype=dtype, **kwargs)
group.create_dataset('indices', data=data.h5py_group['indices'],
dtype=indices_dtype, **kwargs)
group.create_dataset('indptr', data=data.h5py_group['indptr'],
dtype=indptr_dtype, **kwargs)
else:
group = self.h5py_group.create_group(name)
group.attrs['h5sparse_format'] = get_format_str(data)
group.attrs['h5sparse_shape'] = data.shape
group.create_dataset('data', data=data.data, dtype=dtype, **kwargs)
group.create_dataset('indices', data=data.indices,
dtype=indices_dtype, **kwargs)
group.create_dataset('indptr', data=data.indptr,
dtype=indptr_dtype, **kwargs)
return Dataset(group)
def initialize_from(self, filename, ob_stat=None):
"""
Initializes weights from another policy, which must have the same architecture (variable names),
but the weight arrays can be smaller than the current policy.
"""
with h5py.File(filename, 'r') as f:
f_var_names = []
f.visititems(lambda name, obj: f_var_names.append(name) if isinstance(obj, h5py.Dataset) else None)
assert set(v.name for v in self.all_variables) == set(f_var_names), 'Variable names do not match'
init_vals = []
for v in self.all_variables:
shp = v.get_shape().as_list()
f_shp = f[v.name].shape
assert len(shp) == len(f_shp) and all(a >= b for a, b in zip(shp, f_shp)), \
'This policy must have more weights than the policy to load'
init_val = v.eval()
# ob_mean and ob_std are initialized with nan, so set them manually
if 'ob_mean' in v.name:
init_val[:] = 0
init_mean = init_val
elif 'ob_std' in v.name:
init_val[:] = 0.001
init_std = init_val
# Fill in subarray from the loaded policy
init_val[tuple([np.s_[:s] for s in f_shp])] = f[v.name]
init_vals.append(init_val)
self.set_all_vars(*init_vals)
if ob_stat is not None:
ob_stat.set_from_init(init_mean, init_std, init_count=1e5)
def _write_by_chunk(dset, arrs):
# Note: arrs should be a generator for performance reasons.
assert isinstance(dset, Dataset)
# Start the data.
offset = 0
for arr in arrs:
n = arr.shape[0]
arr = arr[...]
# Match the shape of the chunk array with the dset shape.
assert arr.shape == (n,) + dset.shape[1:]
dset[offset:offset + n, ...] = arr
offset += arr.shape[0]
# Check that the copy is complete.
assert offset == dset.shape[0]
def datasets(self, path='/'):
"""Return the list of datasets under a given node."""
return [key for key in self.children(path)
if isinstance(self._h5py_file[path + '/' + key],
h5py.Dataset)]
# Miscellaneous properties
#--------------------------------------------------------------------------