def discretize(self, ts, bins=None, global_min=None, global_max=None):
if bins is None:
bins = self._bins
if np.isscalar(bins):
num_bins = bins
min_value = ts.min()
max_value = ts.max()
if min_value == max_value:
min_value = global_min
max_value = global_max
step = (max_value - min_value) / num_bins
ts_bins = np.arange(min_value, max_value, step)
else:
ts_bins = bins
inds = np.digitize(ts, ts_bins)
binned_ts = tuple(str(i - 1) for i in inds)
return binned_ts
python类digitize()的实例源码
def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
def set_responsibilities(anchor_frames, iou_thresh=0.6):
"""
Changes the IOU values for the anchor frames to binary values
anchor_frames: list of frames where each frame contains all features for a specific anchor
iou_thresh: threshold to decide which anchor is responsible
"""
# set box with maximum IOU to 1
anchor_frames = [frame.copy() for frame in anchor_frames]
# find maximum IOU value over all frames
helper_array = np.array([frame[frame.columns[0]] for frame in anchor_frames]).T
max_indices = np.argmax(helper_array, axis=1)
data_idx = np.arange(len(max_indices))
for obj_idx, frame_idx in zip(data_idx, max_indices):
temp_frame = anchor_frames[frame_idx]
temp_frame.loc[obj_idx, temp_frame.columns[0]] = 1
# applying the iou threshold on a copy of the dataframes
for frame in anchor_frames:
frame[frame.columns[0]] = np.digitize(frame[frame.columns[0]], [iou_thresh])
return anchor_frames
def _init_classes(self, y):
"""Map all possible classes to the range [0,..,C-1]
Parameters
----------
y : list of arrays of int, each element has shape=[samples_i,]
Labels of the samples for each subject
Returns
-------
new_y : list of arrays of int, each element has shape=[samples_i,]
Mapped labels of the samples for each subject
Note
----
The mapping of the classes is saved in the attribute classes_.
"""
self.classes_ = unique_labels(utils.concatenate_not_none(y))
new_y = [None] * len(y)
for s in range(len(y)):
new_y[s] = np.digitize(y[s], self.classes_) - 1
return new_y
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
unique_inverse_y, calc_DKL=False):
bins = bins.astype(np.float32)
num_of_bins = bins.shape[0]
# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
# hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
b2 = np.ascontiguousarray(digitized).view(
np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
unique_array, unique_inverse_t, unique_counts = \
np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
p_ts = unique_counts / float(sum(unique_counts))
PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
if calc_DKL:
pxy_given_T = np.array(
[calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
)
p_XgT = np.vstack(pxy_given_T[:, 0])
p_YgT = pxy_given_T[:, 1]
p_YgT = np.vstack(p_YgT).T
DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
unique_array)
return local_IXT, local_ITY
def _kl_hr(pha, amp, nbins, optimize):
"""Binarize the amplitude according to phase values.
This function is shared by the Kullback-Leibler Distance and the
Height Ratio.
"""
vecbin = np.linspace(-np.pi, np.pi, nbins + 1)
phad = np.digitize(pha, vecbin) - 1
abin = []
for i in np.unique(phad):
# Find where phase take vecbin values :
idx = phad == i
# Take the sum of amplitude inside the bin :
abin_pha = np.einsum('i...j, k...j->ik...', amp, idx,
optimize=optimize)
abin.append(abin_pha)
return np.array(abin)
def _compute_ratemap(self, min_duration=None):
if min_duration is None:
min_duration = self._min_duration
ext = self.trans_func(self._extern, at=self._bst.bin_centers)
ext_bin_idx = np.digitize(ext, self.bins, True)
# make sure that all the events fit between extmin and extmax:
# TODO: this might rather be a warning, but it's a pretty serious warning...
if ext_bin_idx.max() > self.n_bins:
raise ValueError("ext values greater than 'ext_max'")
if ext_bin_idx.min() == 0:
raise ValueError("ext values less than 'ext_min'")
ratemap = np.zeros((self.n_units, self.n_bins))
for tt, bidx in enumerate(ext_bin_idx):
ratemap[:,bidx-1] += self._bst.data[:,tt]
# apply minimum observation duration
for uu in range(self.n_units):
ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0
return ratemap / self._bst.ds
def __call__(self, data_object):
orig_shape = data_object[self.x_name].shape
x_vals = data_object[self.x_name].ravel().astype('float64')
y_vals = data_object[self.y_name].ravel().astype('float64')
x_i = (np.digitize(x_vals, self.x_bins) - 1).astype('int32')
y_i = (np.digitize(y_vals, self.y_bins) - 1).astype('int32')
if np.any((x_i == -1) | (x_i == len(self.x_bins)-1)) \
or np.any((y_i == -1) | (y_i == len(self.y_bins)-1)):
if not self.truncate:
mylog.error("Sorry, but your values are outside" + \
" the table! Dunno what to do, so dying.")
mylog.error("Error was in: %s", data_object)
raise ValueError
else:
x_i = np.minimum(np.maximum(x_i,0), len(self.x_bins)-2)
y_i = np.minimum(np.maximum(y_i,0), len(self.y_bins)-2)
my_vals = np.zeros(x_vals.shape, dtype='float64')
lib.BilinearlyInterpolate(self.table,
x_vals, y_vals, self.x_bins, self.y_bins,
x_i, y_i, my_vals)
my_vals.shape = orig_shape
return my_vals
def interpolation_alphas(self, points, *args, **kwargs):
'''
Returns a pair of values. The 1st value is an array of the depth indices of all the particles.
The 2nd value is an array of the interpolation alphas for the particles between their depth
index and depth_index+1. If both values are None, then all particles are on the surface layer.
'''
points = np.asarray(points, dtype=np.float64)
points = points.reshape(-1, 3)
underwater = points[:, 2] > 0
if len(np.where(underwater)[0]) == 0:
return None, None
indices = -np.ones((len(points)), dtype=np.int64)
alphas = -np.ones((len(points)), dtype=np.float64)
pts = points[underwater]
und_ind = -np.ones((len(np.where(underwater)[0])))
und_alph = und_ind.copy()
und_ind = np.digitize(pts[:,2], self.depth_levels) - 1
for i,n in enumerate(und_ind):
if n == len(self.depth_levels) -1:
und_ind[i] = -1
if und_ind[i] != -1:
und_alph[i] = (pts[i,2] - self.depth_levels[und_ind[i]]) / (self.depth_levels[und_ind[i]+1] - self.depth_levels[und_ind[i]])
indices[underwater] = und_ind
alphas[underwater] = und_alph
return indices, alphas
test_random_forest_classifier_numeric.py 文件源码
项目:coremltools
作者: apple
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def setUpClass(self):
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeClassifier
# Load data and train model
import numpy as np
scikit_data = load_boston()
self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
t = scikit_data.target
num_classes = 3
target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
# Save the data and the model
self.scikit_data = scikit_data
self.target = target
self.feature_names = scikit_data.feature_names
self.output_name = 'target'
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestClassifier
import numpy as np
scikit_data = load_boston()
scikit_model = RandomForestClassifier(random_state = 1)
t = scikit_data.target
target = np.digitize(t, np.histogram(t)[1]) - 1
scikit_model.fit(scikit_data.data, target)
# Save the data and the model
self.scikit_data = scikit_data
self.target = target
self.scikit_model = scikit_model
test_boosted_trees_classifier_numeric.py 文件源码
项目:coremltools
作者: apple
项目源码
文件源码
阅读 41
收藏 0
点赞 0
评论 0
def setUpClass(self):
from sklearn.datasets import load_boston
# Load data and train model
import numpy as np
scikit_data = load_boston()
num_classes = 3
self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
t = scikit_data.target
target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
# Save the data and the model
self.scikit_data = scikit_data
self.target = target
self.feature_names = scikit_data.feature_names
self.output_name = 'target'
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
scikit_data = load_boston()
scikit_model = DecisionTreeClassifier(random_state = 1)
t = scikit_data.target
target = np.digitize(t, np.histogram(t)[1]) - 1
scikit_model.fit(scikit_data.data, target)
# Save the data and the model
self.scikit_data = scikit_data
self.target = target
self.scikit_model = scikit_model
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
import numpy as np
scikit_data = load_boston()
scikit_model = GradientBoostingClassifier(random_state = 1)
t = scikit_data.target
target = np.digitize(t, np.histogram(t)[1]) - 1
scikit_model.fit(scikit_data.data, target)
self.target = target
# Save the data and the model
self.scikit_data = scikit_data
self.scikit_model = scikit_model
def auto_classify_transmitters(detections):
"""Identify transmitter IDs based on carrier frequency."""
# Split by receiver
detections_by_rx = defaultdict(list)
for detection in detections:
detections_by_rx[detection.rxid].append(detection)
edges = {}
for rxid, rx_detections in detections_by_rx.iteritems():
freqs = np.array([d.carrier_info.bin for d in rx_detections])
rx_edges = detect_transmitter_windows(freqs)
summary = ("Detected {} transmitter(s) at RX {}:"
.format(len(rx_edges) - 1, rxid))
for i in range(len(rx_edges) - 1):
summary += " {}-{}".format(rx_edges[i], rx_edges[i+1] - 1)
print(summary)
edges[rxid] = rx_edges[:-1]
txids = [np.digitize(d.carrier_info.bin, edges[d.rxid]) - 1
for d in detections]
return txids
def assign_dope_items(self, selection):
# Builds a list of all DOPE values of the residues in the selection.
ldope = []
for chain_element in selection:
ldope.extend(chain_element.dope_scores)
# Takes the min and max values among all the selected residues.
min_value = min(ldope)
max_value = max(ldope)
# An array with the equally sapced limits generated with the list above.
bins = numpy.array(numpy.linspace(min_value, max_value, num=10))
for chain_element in selection:
# An array with all the DOPE values of a single chain in the selection.
adope = numpy.array(chain_element.dope_scores)
# An array with the id of the bins where those values reside.
inds = numpy.digitize(adope, bins)
# Returns a list like:
# [(-0.052, 4), (-0.03, 3), (-0.04, 5), (-0.04, 6), (-0.041, 7), (-0.042, 8), (-0.043, 10), ...]
# which contains for all standard residues of a polypeptidic chain a tuple. The
# first value of the tuple is the DOPE score of that residues, the second is the id
# (going from 1 to 10) of the bin where that value resides.
chain_element.dope_items = []
for dope_score, bin_id in zip(adope, inds):# zip(ldope, inds):
chain_element.dope_items.append({"dope-score":dope_score, "interval": bin_id})
def __update_state(self):
"""
Updates the state space (self.gamestate) after the suggested action is taken
:return: None
"""
jigsaw_id, place_id = self.decode_action()
self.__update_placed_pieces(jigsaw_id, place_id)
if self.state_type == 'hog':
self.__render_gamestate()
elif self.state_type == 'image':
resized_discrete_im = np.digitize(
imresize(self.jigsaw_image, (self.state_height, self.state_width)),
self.bins)
self.gamestate = np.array([resized_discrete_im]).transpose().swapaxes(0, 1)
else:
ValueError('The state type is not valid, enter "hog" or "image"')
def vals2colors(vals,cmap='GnBu_d',res=100):
"""Maps values to colors
Args:
values (list or list of lists) - list of values to map to colors
cmap (str) - color map (default is 'husl')
res (int) - resolution of the color map (default: 100)
Returns:
list of rgb tuples
"""
# flatten if list of lists
if any(isinstance(el, list) for el in vals):
vals = list(itertools.chain(*vals))
# get palette from seaborn
palette = np.array(sns.color_palette(cmap, res))
ranks = np.digitize(vals, np.linspace(np.min(vals), np.max(vals)+1, res+1)) - 1
return [tuple(i) for i in palette[ranks, :]]
def reverseHistogram(data,bins=None):
"""
Bins data using numpy.histogram and calculates the
reverse indices for the entries like IDL.
Parameters:
data : data to pass to numpy.histogram
bins : bins to pass to numpy.histogram
Returns:
hist : bin content output by numpy.histogram
edges : edges output from numpy.histogram
rev : reverse indices of entries in each bin
Using Reverse Indices:
h,e,rev = histogram(data, bins=bins)
for i in range(h.size):
if rev[i] != rev[i+1]:
# data points were found in this bin, get their indices
indices = rev[ rev[i]:rev[i+1] ]
# do calculations with data[indices] ...
"""
if bins is None: bins = numpy.arange(data.max()+2)
hist, edges = numpy.histogram(data, bins=bins)
digi = numpy.digitize(data.flat,bins=numpy.unique(data)).argsort()
rev = numpy.hstack( (len(edges), len(edges) + numpy.cumsum(hist), digi) )
return hist,edges,rev
def run_semi_online(self, sess, inputs_clean, inputs_noisy, num_samples):
dump = sess.run(self.init_ops,
feed_dict={self.history_clean: inputs_clean[:,0:self.len_pad+1]})
skips_noisy_sum = sess.run(self.skips_noisy_sum,
feed_dict={self.inputs_noisy: inputs_noisy})
indices = inputs_clean[:,self.len_pad:self.len_pad+1]
predictions_ = []
for step in xrange(num_samples):
#indices = inputs_clean[:,self.len_pad+step:self.len_pad+1+step]
feed_dict = feed_dict={self.inputs_clean: indices,
self.skips_noisy: skips_noisy_sum[:,:,step]}
output_dist = sess.run(self.out_ops, feed_dict=feed_dict)[0]
#indices = np.argmax(output_dist, axis=1)[:,None]
#inputs = self.bins_center[indices[:,0]].astype(np.float32)
inputs = np.matmul(output_dist, self.bins_center).astype(np.float32)
indices = np.digitize(inputs, self.bins_edge, right=False)[:,None]
predictions_.append(indices)
predictions = np.concatenate(predictions_, axis=1)
dump = sess.run(self.dequ_ops)
return predictions
def run_semi_online_v2(sess,
out_ops,
skips_noisy_batch,
indices,
inputs_noisy,
num_samples):
skips_noisy_sum = sess.run(skips_noisy_batch)
predictions_ = []
for step in xrange(num_samples):
feed_dict = feed_dict={self.inputs_clean: indices,
self.skips_noisy: skips_noisy_sum[:,:,step]}
output_dist = sess.run([out_ops], feed_dict=feed_dict)[0]
#output dim = 1 x 256, it is 2D but we need 1D input to argmax
indices = random_bins(NUM_CLASSES, output_dist)
inputs = self.bins[indices]
#inputs = np.array(np.matmul(output_dist,self.bins), dtype=np.float32)[:,None]
#indices = np.digitize(inputs[:,0], self.bins, right=False)[:,None]
predictions_.append(inputs)
def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
"""Compute the un-normalized cross-correlogram"""
bin_width = width / float(nb_bins)
start = - width / 2.0
stop = + width / 2.0
bins = np.linspace(start, stop, nb_bins + 1)
values = np.zeros(nb_bins, dtype=np.int)
for v in a:
d = b - v - f * bin_width
is_selected = np.abs(d) < width / 2.0
d = d[is_selected]
indices = np.digitize(d, bins) - 1
values[indices] += 1
if 't_min' in kwargs and 't_max' in kwargs:
t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
if t_min is not None and t_max is not None:
values = values.astype(np.float) / (t_max - t_min)
bins = bins * 1e+3
values = np.append(values, [values[-1]])
return bins, values
def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
"""Compute the un-normalized cross-correlogram"""
bin_width = width / float(nb_bins)
start = - width / 2.0
stop = + width / 2.0
bins = np.linspace(start, stop, nb_bins + 1)
values = np.zeros(nb_bins, dtype=np.int)
for v in a:
d = b - v - f * bin_width
is_selected = np.abs(d) < width / 2.0
d = d[is_selected]
indices = np.digitize(d, bins) - 1
values[indices] += 1
if 't_min' in kwargs and 't_max' in kwargs:
t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
if t_min is not None and t_max is not None:
values = values.astype(np.float) / (t_max - t_min)
bins = bins * 1e+3
values = np.append(values, [values[-1]])
return bins, values
def roundx(x, y, binstart=0.1):
"""Round off to try and grid-up nearly gridded data
"""
bins = np.arange(x.min(), x.max()+binstart, binstart)
counts, bin_edges = np.histogram(x, bins=bins)
# merge together bins that are nighboring and have counts
new_bin_edges = []
new_bin_edges.append(bin_edges[0])
for i, b in enumerate(bin_edges[1:]):
if (counts[i] > 0) & (counts[i-1] > 0):
pass
else:
new_bin_edges.append(bin_edges[i])
if bin_edges[-1] != new_bin_edges[-1]:
new_bin_edges.append(bin_edges[-1])
indx = np.digitize(x, new_bin_edges)
new_bin_edges = np.array(new_bin_edges)
bin_centers = (new_bin_edges[1:]-new_bin_edges[:-1])/2. + new_bin_edges[:-1]
new_x = bin_centers[indx-1]
return new_x
def evaluate_model(model, generator, steps, metric, category_cutoffs=[0.]):
y_true, y_pred = None, None
count = 0
while count < steps:
x_batch, y_batch = next(generator)
y_batch_pred = model.predict_on_batch(x_batch)
y_batch_pred = y_batch_pred.ravel()
y_true = np.concatenate((y_true, y_batch)) if y_true is not None else y_batch
y_pred = np.concatenate((y_pred, y_batch_pred)) if y_pred is not None else y_batch_pred
count += 1
loss = evaluate_keras_metric(y_true.astype(np.float32), y_pred.astype(np.float32), metric)
y_true_class = np.digitize(y_true, category_cutoffs)
y_pred_class = np.digitize(y_pred, category_cutoffs)
# theano does not like integer input
acc = evaluate_keras_metric(y_true_class.astype(np.float32), y_pred_class.astype(np.float32), 'binary_accuracy') # works for multiclass labels as well
return loss, acc, y_true, y_pred, y_true_class, y_pred_class
def put_in_buckets(data_array, labels, buckets, mode='pad'):
"""
Given bucket edges and data, put the data in buckets according to their length
:param data_array:
:param labels:
:param buckets:
:return:
"""
input_lengths = np.array([len(s) for s in data_array], dtype='int')
input_bucket_index = [i if i<len(buckets) else len(buckets)-1 for i in np.digitize(input_lengths, buckets, right=False)] # during testing, longer sentences are just truncated
if mode == 'truncate':
input_bucket_index -= 1
bucketed_data = {}
reordering_indexes = {}
for bucket in list(np.unique(input_bucket_index)):
length_indexes = np.where(input_bucket_index == bucket)[0]
reordering_indexes[bucket] = length_indexes
maxlen = int(np.floor(buckets[bucket]))
padded = pad_data(data_array[length_indexes], labels[length_indexes], max_len=maxlen)
bucketed_data[bucket] = padded # in final dict, start counting by zero
return bucketed_data, reordering_indexes
def transform(self, X, y=None):
"""Binarize X based on the fitted cut points."""
# scikit-learn checks
X = check_array(X)
if self.cut_points is None:
raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.')
if X.shape[1] != len(self.cut_points):
raise ValueError("Provided array's dimensions do not match with the ones from the "
"array `fit` was called on.")
binned = np.array([
np.digitize(x, self.cut_points[i])
if len(self.cut_points[i]) > 0
else np.zeros(x.shape)
for i, x in enumerate(X.T)
]).T
return binned
def get_bg_mats(fragsx, fragsy, sv_region, window_size):
bg_mats = {}
selectors = {"+":"end_pos", "-":"start_pos"}
binsx = numpy.arange(sv_region["startx"], sv_region["endx"]+window_size, window_size)
binsy = numpy.arange(sv_region["starty"], sv_region["endy"]+window_size, window_size)
for orientationx in "+-":
binx = numpy.digitize(fragsx[selectors[orientationx]], binsx)-1
gx = fragsx.groupby(binx)
bcsx = [set(gx.get_group(k)["bc"]) if k in gx.groups else set() for k in range(len(binsx))]
for orientationy in "+-":
biny = numpy.digitize(fragsy[selectors[orientationy]], binsy)-1
gy = fragsy.groupby(biny)
bcsy = [set(gy.get_group(k)["bc"]) if k in gy.groups else set() for k in range(len(binsy))]
bg_mats[orientationx+orientationy] = get_bg_mat(bcsx, bcsy)
return bg_mats
def _digitize(x, bins, right=False):
"""Replacement for digitize with right kwarg (numpy < 1.7).
Notes
-----
This fix is only meant for integer arrays. If ``right==True`` but either
``x`` or ``bins`` are of a different type, a NotImplementedError will be
raised.
"""
if right:
x = np.asarray(x)
bins = np.asarray(bins)
if (x.dtype.kind not in 'ui') or (bins.dtype.kind not in 'ui'):
raise NotImplementedError("Only implemented for integer input")
return np.digitize(x - 1e-5, bins)
else:
return np.digitize(x, bins)