def _index_of(arr, lookup):
"""Replace scalars in an array by their indices in a lookup table.
Implicitely assume that:
* All elements of arr and lookup are non-negative integers.
* All elements or arr belong to lookup.
This is not checked for performance reasons.
"""
# Equivalent of np.digitize(arr, lookup) - 1, but much faster.
# TODO: assertions to disable in production for performance reasons.
# TODO: np.searchsorted(lookup, arr) is faster on small arrays with large
# values
lookup = np.asarray(lookup, dtype=np.int32)
m = (lookup.max() if len(lookup) else 0) + 1
tmp = np.zeros(m + 1, dtype=np.int)
# Ensure that -1 values are kept.
tmp[-1] = -1
if len(lookup):
tmp[lookup] = np.arange(len(lookup))
return tmp[arr]
python类digitize()的实例源码
def _windbarbs(u, v, press, delta):
#delta = 2500 # equals 25mb
p_bin_min = int((np.min(press) // delta) * delta)
p_bin_max = int(((np.max(press) // delta)+1) * delta)
p_bins = np.array(range(p_bin_min, p_bin_max, delta))
ixs = np.digitize(press, p_bins)
uwind = [np.mean(u[ixs == ix]) for ix in list(set(ixs))]
vwind = [np.mean(v[ixs == ix]) for ix in list(set(ixs))]
ax = plt.gca()
inv = ax.transLimits.inverted()
#x_pos, _none = inv.transform((0.92, 0))
x_pos = inv.transform(np.array([[0.92,0]]))[0, 0]
baraxis = [x_pos] * len(p_bins)
plt.barbs(baraxis, p_bins, uwind, vwind, \
barb_increments=barb_increments, linewidth = .75)#, transform=ax.transAxes)
def pdf_bins_batch(bins, prob, querys):
assert (len(bins) == len(prob) + 1)
querys = np.array(querys)
bins = np.array(bins)
idx = np.digitize(querys, bins[1:-1])
# get the mass
masses = prob[idx]
if FLAGS.pdf_normalize_bins:
# get the x bin length
xlen = bins[idx + 1] - bins[idx]
return masses / xlen
else:
return masses
def _plot_line(self, ax, domain, line, label, color, marker):
order = np.argsort(domain)
domain, line = domain[order], line[order]
borders = np.linspace(domain[0], domain[-1], self._resolution)
borders = np.digitize(borders, domain)
domain = np.linspace(domain[0], domain[-1], len(borders) - 1)
lower_ = aggregate(line, borders, lambda x: np.percentile(x, 10, 0)[0])
middle = aggregate(line, borders, lambda x: np.percentile(x, 50, 0)[0])
upper_ = aggregate(line, borders, lambda x: np.percentile(x, 90, 0)[0])
ax.fill_between(
domain, upper_, lower_, facecolor=color, edgecolor=color,
**self.AREA)
ax.plot(
domain, middle, c=color, label=label)
def map_ima_to_2D_hist(xinput, yinput, bins_arr):
"""Image to volume histogram mapping (kind of inverse histogram).
Parameters
----------
xinput : TODO
First image, which is often the intensity image (eg. T1w).
yinput : TODO
Second image, which is often the gradient magnitude image
derived from the first image.
bins_arr : TODO
Array of bins.
Returns
-------
vox2pixMap : TODO
Voxel to pixel mapping.
"""
dgtzData = np.digitize(xinput, bins_arr)-1
dgtzGra = np.digitize(yinput, bins_arr)-1
nr_bins = len(bins_arr)-1 # subtract 1 (more borders than containers)
vox2pixMap = sub2ind(nr_bins, dgtzData, dgtzGra) # 1D
return vox2pixMap
def _node_io_loop(self, func, *args, **kwargs):
root_nodes = kwargs.pop("root_nodes", None)
if root_nodes is None:
root_nodes = self.trees
opbar = kwargs.pop("pbar", None)
ai = np.array([node._ai for node in root_nodes])
dfi = np.digitize(ai, self._ei)
udfi = np.unique(dfi)
for i in udfi:
if opbar is not None:
kwargs["pbar"] = "%s [%d/%d]" % (opbar, i+1, udfi.size)
my_nodes = root_nodes[dfi == i]
kwargs["root_nodes"] = my_nodes
kwargs["fcache"] = {}
fn = "%s_%04d%s" % (self._prefix, i, self._suffix)
f = h5py.File(fn, "r")
kwargs["f"] = f
super(YTreeArbor, self)._node_io_loop(func, *args, **kwargs)
f.close()
def test_mem_digitize(self, level=rlevel):
# Ticket #95
for i in range(100):
np.digitize([1, 2, 3, 4], [1, 3])
np.digitize([0, 1, 2, 3, 4], [1, 3])
def plot_reliability_diagram(y,x,bins=np.linspace(0,1,21),size_points=True, show_baseline=True,ax=None, marker='+',c='red', **kwargs):
if ax is None:
ax = _gca()
fig = ax.get_figure()
digitized_x = np.digitize(x, bins)
mean_count_array = np.array([[np.mean(y[digitized_x == i]),len(y[digitized_x == i]),np.mean(x[digitized_x==i])] for i in np.unique(digitized_x)])
if show_baseline:
ax.plot(np.linspace(0,1,100),(np.linspace(0,1,100)),'k--')
for i in range(len(mean_count_array[:,0])):
if size_points:
plt.scatter(mean_count_array[i,2],mean_count_array[i,0],s=mean_count_array[i,1],marker=marker,c=c, **kwargs)
else:
plt.scatter(mean_count_array[i,2],mean_count_array[i,0], **kwargs)
plt.axis([-0.1,1.1,-0.1,1.1])
return(mean_count_array[:,2],mean_count_array[:,0],mean_count_array[:,1])
preprocessing.py 文件源码
项目:Epileptic-Seizure-Prediction
作者: cedricsimar
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def group_into_bands(self, fft, fft_freq, nfreq_bands):
"""
Group the fft result by frequency bands and take the mean
of the fft values within each band
Return a list of the frequency bands' means (except the first element
which is the frequency band 0 - 0.1Hz)
"""
freq_bands = np.digitize(fft_freq, FREQUENCIES)
df = DataFrame({'fft': fft, 'band': freq_bands})
df = df.groupby('band').mean()
return df.fft[1:-1]
def weighted_thin(weights,thin_unit):
'''
Given a weight array, perform thinning.
If the all weights are equal, this should
be equivalent to selecting every N/((thinfrac*N)
where N=len(weights).
'''
N=len(weights)
if thin_unit==0: return range(N),weights
if thin_unit<1:
N2=np.int(N*thin_unit)
else:
N2=N//thin_unit
#bin the weight index to have the desired length
#this defines the bin edges
bins = np.linspace(-1, N, N2+1)
#this collects the indices of the weight array in each bin
ind = np.digitize(np.arange(N), bins)
#this gets the maximum weight in each bin
thin_ix=pd.Series(weights).groupby(ind).idxmax().tolist()
thin_ix=np.array(thin_ix,dtype=np.intp)
logger.info('Thinning with weighted binning: thinfrac={}. new_nsamples={},old_nsamples={}'.format(thin_unit,len(thin_ix),len(w)))
return {'ix':thin_ix, 'w':weights[thin_ix]}
def calc_information_for_layer(data, bins, unique_inverse_x, unique_inverse_y, pxs, pys1):
bins = bins.astype(np.float32)
digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
b2 = np.ascontiguousarray(digitized).view(
np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
unique_array, unique_inverse_t, unique_counts = \
np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
p_ts = unique_counts / float(sum(unique_counts))
PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
unique_array)
return local_IXT, local_ITY
def calc_all_sigams(data, sigmas):
batchs = 128
num_of_bins = 8
# bins = np.linspace(-1, 1, num_of_bins).astype(np.float32)
# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
# data = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
batch_points = np.rint(np.arange(0, data.shape[0] + 1, batchs)).astype(dtype=np.int32)
I_XT = []
num_of_rand = min(800, data.shape[1])
for sigma in sigmas:
# print sigma
I_XT_temp = 0
for i in range(0, len(batch_points) - 1):
new_data = data[batch_points[i]:batch_points[i + 1], :]
rand_indexs = np.random.randint(0, new_data.shape[1], num_of_rand)
new_data = new_data[:, :]
N = new_data.shape[0]
d = new_data.shape[1]
diff_mat = np.linalg.norm(((new_data[:, np.newaxis, :] - new_data)), axis=2)
# print diff_mat.shape, new_data.shape
s0 = 0.2
# DOTO -add leaveoneout validation
res = minimize(optimiaze_func, s0, args=(diff_mat, d, N), method='nelder-mead',
options={'xtol': 1e-8, 'disp': False, 'maxiter': 6})
eta = res.x
diff_mat0 = - 0.5 * (diff_mat / (sigma ** 2 + eta ** 2))
diff_mat1 = np.sum(np.exp(diff_mat0), axis=0)
diff_mat2 = -(1.0 / N) * np.sum(np.log2((1.0 / N) * diff_mat1))
I_XT_temp += diff_mat2 - d * np.log2((sigma ** 2) / (eta ** 2 + sigma ** 2))
# print diff_mat2 - d*np.log2((sigma**2)/(eta**2+sigma**2))
I_XT_temp /= len(batch_points)
I_XT.append(I_XT_temp)
sys.stdout.flush()
return I_XT
def _compute_ratemap(self, min_duration=None):
"""
min_duration is the min duration in seconds for a bin to be
considered 'valid'; if too few observations were made, then the
firing rate is kept at an estimate of 0. If min_duration == 0,
then all the spikes are used.
"""
if min_duration is None:
min_duration = self._min_duration
x, y = self.trans_func(self._extern, at=self._bst.bin_centers)
ext_bin_idx_x = np.digitize(x, self.xbins, True)
ext_bin_idx_y = np.digitize(y, self.ybins, True)
# make sure that all the events fit between extmin and extmax:
# TODO: this might rather be a warning, but it's a pretty serious warning...
if ext_bin_idx_x.max() > self.n_xbins:
raise ValueError("ext values greater than 'ext_xmax'")
if ext_bin_idx_x.min() == 0:
raise ValueError("ext values less than 'ext_xmin'")
if ext_bin_idx_y.max() > self.n_ybins:
raise ValueError("ext values greater than 'ext_ymax'")
if ext_bin_idx_y.min() == 0:
raise ValueError("ext values less than 'ext_ymin'")
ratemap = np.zeros((self.n_units, self.n_xbins, self.n_ybins))
for tt, (bidxx, bidxy) in enumerate(zip(ext_bin_idx_x, ext_bin_idx_y)):
ratemap[:,bidxx-1, bidxy-1] += self._bst.data[:,tt]
# apply minimum observation duration
for uu in range(self.n_units):
ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0
return ratemap / self._bst.ds
def create_one(self):
type_to_create = self.values[numpy.digitize(numpy.random.uniform(0, 1), self.bins)]
return self.create_type(type_to_create)
def get_from_custom_distribution(random_value, bins, values):
return values[np.digitize(random_value, bins)]
def spl_interp(xa, ya, y2a, x):
n = xa.size
# valloc=baseline_code.value_locate.value_locate(xa, x)
valloc = numpy.digitize(x,
xa) - 1 # The numpy routine digitize appears to basically do what value_locate does in IDL
klo = []
for i in valloc:
klo.append(min(max(i, 0), (n - 2)))
klo = numpy.array(klo)
khi = klo + 1
#
# KLO and KHI now bracket the input value of X
#
if min(xa[khi] - xa[klo]) == 0: print('SPLINT - XA inputs must be distinct')
#
# Cubic spline polynomial is now evaluated
#
h = xa[khi] - xa[klo]
a = (xa[khi] - x) / h
b = (x - xa[klo]) / h
output = a * ya[klo] + b * ya[khi] + ((a ** 3. - a) * y2a[klo] + (b ** 3. - b) * y2a[khi]) * (h ** 2.) / 6.
return output
# spl_interp.pro
def test_mem_digitize(self, level=rlevel):
# Ticket #95
for i in range(100):
np.digitize([1, 2, 3, 4], [1, 3])
np.digitize([0, 1, 2, 3, 4], [1, 3])
def map_to_colors(buff, cmap_name):
try:
lut = cmd.color_map_luts[cmap_name]
except KeyError:
try:
# if cmap is tuple, then we're using palettable or brewer2mpl cmaps
if isinstance(cmap_name, tuple):
cmap = get_brewer_cmap(cmap_name)
else:
cmap = mcm.get_cmap(cmap_name)
cmap(0.0)
lut = cmap._lut.T
except ValueError:
raise KeyError(
"Your color map (%s) was not found in either the extracted"
" colormap file or matplotlib colormaps" % cmap_name)
if isinstance(cmap_name, tuple):
# If we are using the colorbrewer maps, don't interpolate
shape = buff.shape
# We add float_eps so that digitize doesn't go out of bounds
x = np.mgrid[0.0:1.0+np.finfo(np.float32).eps:lut[0].shape[0]*1j]
inds = np.digitize(buff.ravel(), x)
inds.shape = (shape[0], shape[1])
mapped = np.dstack([(v[inds]*255).astype('uint8') for v in lut])
del inds
else:
x = np.mgrid[0.0:1.0:lut[0].shape[0]*1j]
mapped = np.dstack(
[(np.interp(buff, x, v)*255).astype('uint8') for v in lut ])
return mapped.copy("C")
def _calculate_file_offset_map(self):
# After the FOF is performed, a load-balancing step redistributes halos
# and then writes more fields. Here, for each file, we create a list of
# files which contain the rest of the redistributed particles.
ifof = np.array([data_file.total_particles["Group"]
for data_file in self.data_files])
isub = np.array([data_file.total_offset
for data_file in self.data_files])
subend = isub.cumsum()
fofend = ifof.cumsum()
istart = np.digitize(fofend - ifof, subend - isub) - 1
iend = np.clip(np.digitize(fofend, subend), 0, ifof.size - 2)
for i, data_file in enumerate(self.data_files):
data_file.offset_files = self.data_files[istart[i]: iend[i] + 1]