def write_data_frame(fn, df):
''' Write the pandas dataframe object to an HDF5 file. Each column is written as a single 1D dataset at the top
level of the HDF5 file, using the native pandas datatype'''
# Always write a fresh file -- the 'w' argument to h5py.File is supposed to truncate an existing file, but it doesn't appear to work correctly
if os.path.exists(fn):
os.remove(fn)
f = h5py.File(fn, "w")
# To preserve column order, write columns to an attribute
column_names = np.array(list(df.columns))
f.attrs.create("column_names", column_names)
for col in df.columns:
write_data_column(f, df[col])
f.close()
python类object()的实例源码
def append_data_frame(fn, df):
''' Write the pandas dataframe object to an HDF5 file. Each column is written as a single 1D dataset at the top
level of the HDF5 file, using the native pandas datatype'''
if not os.path.exists(fn):
write_data_frame(fn, df)
return
f = h5py.File(fn, "a")
column_names = f.attrs.get("column_names")
for col_name in column_names:
ds = f[col_name]
col = df[col_name]
append_data_column(ds, col)
f.close()
def compile(self, root_block_like):
"""Compiles a block, and sets it to the root.
Args:
root_block_like: A block or an object that can be converted to a block by
[`td.convert_to_block`](#td.convert_to_block). Must have at least one
output or metric tensor. The output type may not contain any
Sequence or PyObject types.
Returns:
`self`
Raises:
RuntimeError: If `init_loom()` has already been called.
TypeError: If `root_block_like` cannot be converted to a block.
TypeError: If `root_block_like` fails to compile.
TypeError: If `root_block_like` has no output or metric tensors.
TypeError: If `root_block_like` has an invalid output type.
"""
if self.is_loom_initialized:
raise RuntimeError('Loom has already been initialized.')
return self._setup(root_block_like, interactive_mode=False)
def test_wrap(self):
class with_wrap(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
r = with_wrap()
r.arr = arr
r.context = context
return r
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x.arr, np.zeros(1))
func, args, i = x.context
self.assertTrue(func is ncu.minimum)
self.assertEqual(len(args), 2)
assert_equal(args[0], a)
assert_equal(args[1], a)
self.assertEqual(i, 0)
def test_dot_override(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
class A(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return "A"
class B(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return NotImplemented
a = A()
b = B()
c = np.array([[1]])
assert_equal(np.dot(a, b), "A")
assert_equal(c.dot(a), "A")
assert_raises(TypeError, np.dot, b, c)
assert_raises(TypeError, c.dot, b)
def test_ufunc_override_normalize_signature(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
# gh-5674
class SomeClass(object):
def __numpy_ufunc__(self, ufunc, method, i, inputs, **kw):
return kw
a = SomeClass()
kw = np.add(a, [1])
assert_('sig' not in kw and 'signature' not in kw)
kw = np.add(a, [1], sig='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
kw = np.add(a, [1], signature='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
def test_object_logical(self):
a = np.array([3, None, True, False, "test", ""], dtype=object)
assert_equal(np.logical_or(a, None),
np.array([x or None for x in a], dtype=object))
assert_equal(np.logical_or(a, True),
np.array([x or True for x in a], dtype=object))
assert_equal(np.logical_or(a, 12),
np.array([x or 12 for x in a], dtype=object))
assert_equal(np.logical_or(a, "blah"),
np.array([x or "blah" for x in a], dtype=object))
assert_equal(np.logical_and(a, None),
np.array([x and None for x in a], dtype=object))
assert_equal(np.logical_and(a, True),
np.array([x and True for x in a], dtype=object))
assert_equal(np.logical_and(a, 12),
np.array([x and 12 for x in a], dtype=object))
assert_equal(np.logical_and(a, "blah"),
np.array([x and "blah" for x in a], dtype=object))
assert_equal(np.logical_not(a),
np.array([not x for x in a], dtype=object))
assert_equal(np.logical_or.reduce(a), 3)
assert_equal(np.logical_and.reduce(a), None)
def test_dtype_with_object(self):
# Test using an explicit dtype with an object
data = """ 1; 2001-01-01
2; 2002-01-31 """
ndtype = [('idx', int), ('code', np.object)]
func = lambda s: strptime(s.strip(), "%Y-%m-%d")
converters = {1: func}
test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype,
converters=converters)
control = np.array(
[(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
dtype=ndtype)
assert_equal(test, control)
ndtype = [('nest', [('idx', int), ('code', np.object)])]
try:
test = np.genfromtxt(TextIO(data), delimiter=";",
dtype=ndtype, converters=converters)
except NotImplementedError:
pass
else:
errmsg = "Nested dtype involving objects should be supported."
raise AssertionError(errmsg)
def test_gft_using_filename(self):
# Test that we can load data from a filename as well as a file
# object
tgt = np.arange(6).reshape((2, 3))
if sys.version_info[0] >= 3:
# python 3k is known to fail for '\r'
linesep = ('\n', '\r\n')
else:
linesep = ('\n', '\r\n', '\r')
for sep in linesep:
data = '0 1 2' + sep + '3 4 5'
with temppath() as name:
with open(name, 'w') as f:
f.write(data)
res = np.genfromtxt(name)
assert_array_equal(res, tgt)
def test_generic_rank3(self):
"""Test rank 3 array for all dtypes."""
def foo(t):
a = np.empty((4, 2, 3), t)
a.fill(1)
b = a.copy()
c = a.copy()
c.fill(0)
self._test_equal(a, b)
self._test_not_equal(c, b)
# Test numeric types and object
for t in '?bhilqpBHILQPfdgFDG':
foo(t)
# Test strings
for t in ['S1', 'U1']:
foo(t)
def test_TakeTransposeInnerOuter(self):
# Test of take, transpose, inner, outer products
x = arange(24)
y = np.arange(24)
x[5:6] = masked
x = x.reshape(2, 3, 4)
y = y.reshape(2, 3, 4)
assert_equal(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))
assert_equal(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))
assert_equal(np.inner(filled(x, 0), filled(y, 0)),
inner(x, y))
assert_equal(np.outer(filled(x, 0), filled(y, 0)),
outer(x, y))
y = array(['abc', 1, 'def', 2, 3], object)
y[2] = masked
t = take(y, [0, 3, 4])
assert_(t[0] == 'abc')
assert_(t[1] == 2)
assert_(t[2] == 3)
def _parase_fq_factor(code, start, end):
symbol = _code_to_symbol(code)
request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], symbol))
text = urlopen(request, timeout=10).read()
text = text[1:len(text)-1]
text = text.decode('utf-8') if ct.PY3 else text
text = text.replace('{_', '{"')
text = text.replace('total', '"total"')
text = text.replace('data', '"data"')
text = text.replace(':"', '":"')
text = text.replace('",_', '","')
text = text.replace('_', '-')
text = json.loads(text)
df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
df['date'] = df['date'].map(_fun_except) # for null case
if df['date'].dtypes == np.object:
df['date'] = df['date'].astype(np.datetime64)
df = df.drop_duplicates('date')
df['factor'] = df['factor'].astype(float)
return df
def _parase_fq_factor(code, start, end):
symbol = _code_to_symbol(code)
request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], symbol))
text = urlopen(request, timeout=10).read()
text = text[1:len(text)-1]
text = text.replace('{_', '{"')
text = text.replace('total', '"total"')
text = text.replace('data', '"data"')
text = text.replace(':"', '":"')
text = text.replace('",_', '","')
text = text.replace('_', '-')
text = json.loads(text)
df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
df['date'] = df['date'].map(_fun_except) # for null case
if df['date'].dtypes == np.object:
df['date'] = df['date'].astype(np.datetime64)
df = df.drop_duplicates('date')
df['factor'] = df['factor'].astype(float)
return df
def least_square_lagged_regression(u_array):
"""
u_array, q, T+1, p
"""
q,T,p = u_array.shape
T -= 1
# t0, t1 term is t1 regressed on t0
lagged_coef_mat = np.zeros([T,T],dtype = np.object)
for t0 in range(T):
for t1 in range(t0,T):
tmp_coef = np.zeros([p,p])
for i in range(p):
# least square regression u_t+h[i] u_t
tmp_y = u_array[:,t1+1,i]
tmp_x = u_array[:,t0,:]
# (X'X)^{-1} X' Y
tmp_coef[i,:] = np.linalg.inv(tmp_x.T.dot(tmp_x)).dot(tmp_x.T.dot(tmp_y))
lagged_coef_mat[t0,t1] = tmp_coef
return lagged_coef_mat
def redraw(self):
column_index1 = self.combo_box1.GetSelection()
if column_index1 != wx.NOT_FOUND and column_index1 != 0:
# subtract one to remove the neutral selection index
column_index1 -= 1
df = self.df_list_ctrl.get_filtered_df()
if len(df) > 0:
self.axes.clear()
column = df.iloc[:, column_index1]
is_string_col = column.dtype == np.object and isinstance(column.values[0], str)
if is_string_col:
value_counts = column.value_counts().sort_index()
value_counts.plot(kind='bar', ax=self.axes)
else:
self.axes.hist(column.values, bins=100)
self.canvas.draw()
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
id_0 = block_id[0]
id_1 = block_id[1]
block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff
adj_list = np.where(block[:,:] == True)
adj_list = np.vstack(adj_list)
adj_list[0] = adj_list[0]+id_0*len_chunks
adj_list[1] = adj_list[1]+id_1*len_chunks
if adj_list.shape[1] == 0:
adj_list=np.zeros((2,1))
graph = nx.Graph()
edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
graph.add_edges_from(edges)
l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)
return l
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
id_0 = block_id[0]
id_1 = block_id[1]
block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff
adj_list = np.where(block[:,:] == True)
adj_list = np.vstack(adj_list)
adj_list[0] = adj_list[0]+id_0*len_chunks
adj_list[1] = adj_list[1]+id_1*len_chunks
if adj_list.shape[1] == 0:
adj_list=np.zeros((2,1))
graph = nx.Graph()
edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
graph.add_edges_from(edges)
l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)
return l
def get_samples(desired_data):
all_samples = []
for data in desired_data:
temperatures = np.atleast_1d(data['conditions']['T'])
num_configs = np.array(data['solver'].get('sublattice_configurations'), dtype=np.object).shape[0]
site_fractions = data['solver'].get('sublattice_occupancies', [[1]] * num_configs)
site_fraction_product = [reduce(operator.mul, list(itertools.chain(*[np.atleast_1d(f) for f in fracs])), 1)
for fracs in site_fractions]
# TODO: Subtle sorting bug here, if the interactions aren't already in sorted order...
interaction_product = []
for fracs in site_fractions:
interaction_product.append(float(reduce(operator.mul,
[f[0] - f[1] for f in fracs if isinstance(f, list) and len(f) == 2],
1)))
if len(interaction_product) == 0:
interaction_product = [0]
comp_features = zip(site_fraction_product, interaction_product)
all_samples.extend(list(itertools.product(temperatures, comp_features)))
return all_samples
def _shift_reference_state(desired_data, feature_transform, fixed_model):
"""
Shift data to a new common reference state.
"""
total_response = []
for dataset in desired_data:
values = np.asarray(dataset['values'], dtype=np.object)
if dataset['solver'].get('sublattice_occupancies', None) is not None:
value_idx = 0
for occupancy, config in zip(dataset['solver']['sublattice_occupancies'],
dataset['solver']['sublattice_configurations']):
if dataset['output'].endswith('_FORM'):
pass
elif dataset['output'].endswith('_MIX'):
values[..., value_idx] += feature_transform(fixed_model.models['ref'])
pass
else:
raise ValueError('Unknown property to shift: {}'.format(dataset['output']))
value_idx += 1
total_response.append(values.flatten())
return total_response
def get_his_std( data_pixel, rois, max_cts=None):
'''
YG. Dev 16, 2016
Calculate the photon histogram for multi-q by giving
Parameters:
data_pixel: multi-D array, for the photon counts
max_cts: for bin max, bin will be [0,1,2,..., max_cts]
Return:
bins
his
std
'''
if max_cts is None:
max_cts = np.max( data_pixel ) + 1
qind, pixelist = roi.extract_label_indices( rois )
noqs = len( np.unique(qind) )
his= np.zeros( [noqs], dtype=np.object)
std= np.zeros_like( his, dtype=np.object)
kmean = np.zeros_like( his, dtype=np.object)
for qi in range(noqs):
pixelist_qi = np.where( qind == qi+1)[0]
#print(qi, max_cts)
bins, his[qi], std[qi], kmean[qi] = get_his_std_qi( data_pixel[:,pixelist_qi] , max_cts)
return bins, his, std, kmean
def get_his_std_from_pds( spec_pds, his_shapes=None):
'''Y.G.Dec 22, 2016
get spec_his, spec_std from a pandas.dataframe file
Parameters:
spec_pds: pandas.dataframe, contains columns as 'count',
spec_his (as 'his_level_0_q_0'), spec_std (as 'std_level_0_q_0')
his_shapes: the shape of the returned spec_his, if None, shapes = (2, (len(spec_pds.keys)-1)/4) )
Return:
spec_his: array, shape as his_shapes
spec_std, array, shape as his_shapes
'''
spkeys = list( spec_pds.keys() )
if his_shapes is None:
M,N = 2, int( (len(spkeys)-1)/4 )
#print(M,N)
spec_his = np.zeros( [M,N], dtype=np.object)
spec_std = np.zeros( [M,N], dtype=np.object)
for i in range(M):
for j in range(N):
spec_his[i,j] = np.array( spec_pds[ spkeys[1+ i*N + j] ][ ~np.isnan( spec_pds[ spkeys[1+ i*N + j] ] )] )
spec_std[i,j] = np.array( spec_pds[ spkeys[1+ 2*N + i*N + j]][ ~np.isnan( spec_pds[ spkeys[1+ 2*N + i*N + j]] )] )
return spec_his, spec_std
def coords_edges(self, edges):
'''
Returns a list of coordinates head and tail points for all edge in edges
'''
res = np.empty((len(edges)), dtype=object)
for r, e in zip(range(len(edges)), edges):
if e[0] is None:
e[0] = 0
res[r] = self.coords_edge(e)
if len(res[r][0]) != 2:
print 'there is an error with the edges'
import pdb
pdb.set_trace()
# v = np.vectorize(self.coords_edge, otypes=[np.object])
# res = v(edges)
return res
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def prim(self):
'''
Returns Prim's minimum spanninng tree
'''
big_f = set([])
costs = np.empty((self.n), dtype=object)
costs[:] = np.max(self.costs) + 1
big_e = np.empty((self.n), dtype=object)
big_q = set(range(self.n))
tree_edges = np.array([], dtype=object)
while len(big_q) > 0:
v = np.argmin(costs)
big_q.remove(v)
costs[v] = np.Infinity
big_f.add(v)
if big_e[v] is not None:
tree_edges = np.append(tree_edges, None)
tree_edges[-1] = (big_e[v], v)
for i, w in zip(range(len(self.FSs[v])), self.FSs[v]):
if w in big_q and self.FS_costs[v][i] < costs[w]:
costs[w] = self.FS_costs[v][i]
big_e[w] = v
return tree_edges
def connect_graphs(self, sets_orig, edges_orig):
'''
Returns the edges needed to connect unconnected graphs (sets of nodes)
given a set of sets of nodes, select the master_graph (the biggest) one
and search the shortest edges to connect the other sets of nodes
'''
master_graph = max(sets_orig, key=len)
sets = sets_orig.copy()
edges = np.array([], dtype=object)
sets.remove(master_graph)
master_tree = cKDTree(self.nodes[list(master_graph)])
for s in sets:
x = np.array(list(s))
nearests = np.array([master_tree.query(self.nodes[v]) for v in x])
tails = nearests[
nearests[:, 0].argsort()][:, 1][:self.max_neighbours]
heads = x[nearests[:, 0].argsort()][:self.max_neighbours]
for head, tail in zip(heads, tails):
edges = np.append(edges, None)
edges[-1] = (head, tail)
edges = np.append(edges, None)
edges[-1] = (tail, head)
return edges
def coords_edges(self, edges):
'''
Returns a list of coordinates head and tail points for all edge in edges
'''
res = np.empty((len(edges)), dtype=object)
for r, e in zip(range(len(edges)), edges):
if e[0] is None:
e[0] = 0
res[r] = self.coords_edge(e)
if len(res[r][0]) != 2:
print 'there is an error with the edges'
import pdb
pdb.set_trace()
# v = np.vectorize(self.coords_edge, otypes=[np.object])
# res = v(edges)
return res
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def prim(self):
'''
Returns Prim's minimum spanninng tree
'''
big_f = set([])
costs = np.empty((self.n), dtype=object)
costs[:] = np.max(self.costs) + 1
big_e = np.empty((self.n), dtype=object)
big_q = set(range(self.n))
tree_edges = np.array([], dtype=object)
while len(big_q) > 0:
v = np.argmin(costs)
big_q.remove(v)
costs[v] = np.Infinity
big_f.add(v)
if big_e[v] is not None:
tree_edges = np.append(tree_edges, None)
tree_edges[-1] = (big_e[v], v)
for i, w in zip(range(len(self.FSs[v])), self.FSs[v]):
if w in big_q and self.FS_costs[v][i] < costs[w]:
costs[w] = self.FS_costs[v][i]
big_e[w] = v
return tree_edges
def connect_graphs(self, sets_orig, edges_orig):
'''
Returns the edges needed to connect unconnected graphs (sets of nodes)
given a set of sets of nodes, select the master_graph (the biggest) one
and search the shortest edges to connect the other sets of nodes
'''
master_graph = max(sets_orig, key=len)
sets = sets_orig.copy()
edges = np.array([], dtype=object)
sets.remove(master_graph)
master_tree = cKDTree(self.nodes[list(master_graph)])
for s in sets:
x = np.array(list(s))
nearests = np.array([master_tree.query(self.nodes[v]) for v in x])
tails = nearests[
nearests[:, 0].argsort()][:, 1][:self.max_neighbours]
heads = x[nearests[:, 0].argsort()][:self.max_neighbours]
for head, tail in zip(heads, tails):
edges = np.append(edges, None)
edges[-1] = (head, tail)
edges = np.append(edges, None)
edges[-1] = (tail, head)
return edges
def test_wrap(self):
class with_wrap(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
r = with_wrap()
r.arr = arr
r.context = context
return r
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x.arr, np.zeros(1))
func, args, i = x.context
self.assertTrue(func is ncu.minimum)
self.assertEqual(len(args), 2)
assert_equal(args[0], a)
assert_equal(args[1], a)
self.assertEqual(i, 0)