def sample_weights(self, idxs, scores):
N = len(scores)
S1 = scores[np.setdiff1d(np.arange(N), idxs)].sum()
return np.tile([float(N), float(S1)], (len(idxs), 1))
python类setdiff1d()的实例源码
def sample_weights(self, idxs, scores):
N = len(scores)
S1 = scores[np.setdiff1d(np.arange(N), idxs)].sum()
return np.tile([float(N), float(S1)], (len(idxs), 1))
def sample_weights(self, idxs, scores):
N = len(scores)
S1 = scores[np.setdiff1d(np.arange(N), idxs)].sum()
return np.tile([float(N), float(S1)], (len(idxs), 1))
def _generate_pairs(ids):
id_i, id_j = np.meshgrid(ids, ids, indexing='ij') # Grouping the input object rois
id_i = id_i.reshape(-1)
id_j = id_j.reshape(-1)
# remove the diagonal items
id_num = len(ids)
diagonal_items = np.array(range(id_num))
diagonal_items = diagonal_items * id_num + diagonal_items
all_id = range(len(id_i))
selected_id = np.setdiff1d(all_id, diagonal_items)
id_i = id_i[selected_id]
id_j = id_j[selected_id]
return id_i, id_j
def ttv(self, v, modes=[], without=False):
"""
Tensor times vector product
Parameters
----------
v : 1-d array or tuple of 1-d arrays
Vector to be multiplied with tensor.
modes : array_like of integers, optional
Modes in which the vectors should be multiplied.
without : boolean, optional
If True, vectors are multiplied in all modes **except** the
modes specified in ``modes``.
"""
if not isinstance(v, tuple):
v = (v, )
dims, vidx = check_multiplication_dims(modes, self.ndim, len(v), vidx=True, without=without)
for i in range(len(dims)):
if not len(v[vidx[i]]) == self.shape[dims[i]]:
raise ValueError('Multiplicant is wrong size')
remdims = np.setdiff1d(range(self.ndim), dims)
return self._ttv_compute(v, dims, vidx, remdims)
#@abstractmethod
#def ttt(self, other, modes=None):
# pass
def setdiff1d(ar1, ar2, assume_unique=False):
"""
Find the set difference of two arrays.
Return the sorted, unique values in `ar1` that are not in `ar2`.
Parameters
----------
ar1 : array_like
Input array.
ar2 : array_like
Input comparison array.
assume_unique : bool
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
Returns
-------
setdiff1d : ndarray
Sorted 1D array of values in `ar1` that are not in `ar2`.
See Also
--------
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
Examples
--------
>>> a = np.array([1, 2, 3, 2, 4, 1])
>>> b = np.array([3, 4, 5, 6])
>>> np.setdiff1d(a, b)
array([1, 2])
"""
if assume_unique:
ar1 = np.asarray(ar1).ravel()
else:
ar1 = unique(ar1)
ar2 = unique(ar2)
return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
def test_boolean_spheres_overlap():
r"""Test to make sure that boolean objects (spheres, overlap)
behave the way we expect.
Test overlapping spheres.
"""
ds = fake_amr_ds()
sp1 = ds.sphere([0.45, 0.45, 0.45], 0.15)
sp2 = ds.sphere([0.55, 0.55, 0.55], 0.15)
# Get indices of both.
i1 = sp1["index","morton_index"]
i2 = sp2["index","morton_index"]
# Make some booleans
bo1 = sp1 & sp2
bo2 = sp1 - sp2
bo3 = sp1 | sp2
bo4 = ds.union([sp1, sp2])
bo5 = ds.intersection([sp1, sp2])
# Now make sure the indices also behave as we expect.
lens = np.intersect1d(i1, i2)
apple = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, lens)
assert_array_equal(b2, apple)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = sp1 ^ sp2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_regions_overlap():
r"""Test to make sure that boolean objects (regions, overlap)
behave the way we expect.
Test overlapping regions.
"""
ds = fake_amr_ds()
re1 = ds.region([0.55]*3, [0.5]*3, [0.6]*3)
re2 = ds.region([0.6]*3, [0.55]*3, [0.65]*3)
# Get indices of both.
i1 = re1["index","morton_index"]
i2 = re2["index","morton_index"]
# Make some booleans
bo1 = re1 & re2
bo2 = re1 - re2
bo3 = re1 | re2
bo4 = ds.union([re1, re2])
bo5 = ds.intersection([re1, re2])
# Now make sure the indices also behave as we expect.
cube = np.intersect1d(i1, i2)
bite_cube = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, cube)
assert_array_equal(b2, bite_cube)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = re1 ^ re2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_ellipsoids_overlap():
r"""Test to make sure that boolean objects (ellipsoids, overlap)
behave the way we expect.
Test overlapping ellipsoids.
"""
ds = fake_amr_ds()
ell1 = ds.ellipsoid([0.45]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1)
ell2 = ds.ellipsoid([0.55]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1)
# Get indices of both.
i1 = ell1["index","morton_index"]
i2 = ell2["index","morton_index"]
# Make some booleans
bo1 = ell1 & ell2
bo2 = ell1 - ell2
bo3 = ell1 | ell2
bo4 = ds.union([ell1, ell2])
bo5 = ds.intersection([ell1, ell2])
# Now make sure the indices also behave as we expect.
overlap = np.intersect1d(i1, i2)
diff = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, overlap)
assert_array_equal(b2, diff)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = ell1 ^ ell2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_slices_overlap():
r"""Test to make sure that boolean objects (slices, overlap)
behave the way we expect.
Test overlapping slices.
"""
ds = fake_amr_ds()
sl1 = ds.r[:,:,0.25]
sl2 = ds.r[:,0.75,:]
# Get indices of both.
i1 = sl1["index","morton_index"]
i2 = sl2["index","morton_index"]
# Make some booleans
bo1 = sl1 & sl2
bo2 = sl1 - sl2
bo3 = sl1 | sl2
bo4 = ds.union([sl1, sl2])
bo5 = ds.intersection([sl1, sl2])
# Now make sure the indices also behave as we expect.
line = np.intersect1d(i1, i2)
orig = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, line)
assert_array_equal(b2, orig)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = sl1 ^ sl2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def analyze_false(validData,validDataNumbers,validLabels,model):
'Calculating precision and recall for best model...'
predictions = np.squeeze((model.predict(validDataNumbers) > 0.5).astype('int32'))
c1_inds = np.where(validLabels == 1)[0]
pos_inds = np.where((predictions+validLabels) == 2)[0] #np.squeeze(predictions) == validLabels
neg_inds = np.setdiff1d(c1_inds,pos_inds)
seq_lengths = np.zeros((validData.shape[0]))
for ind,row in np.ndenumerate(validData):
seq_lengths[ind] = len(wordpunct_tokenize(row.lower().strip()))
mean_true_length = np.mean(seq_lengths[pos_inds])
mean_false_length = np.mean(seq_lengths[neg_inds])
return mean_false_length,mean_true_length
def _get_epochs_interpolation(self, epochs, drop_log,
ch_type, verbose='progressbar'):
"""Interpolate the bad epochs."""
# 1: bad segment, # 2: interpolated
fix_log = drop_log.copy()
ch_names = epochs.ch_names
non_picks = np.setdiff1d(range(epochs.info['nchan']), self.picks)
interp_channels = list()
n_interpolate = self.n_interpolate[ch_type]
for epoch_idx in range(len(epochs)):
n_bads = drop_log[epoch_idx, self.picks].sum()
if n_bads == 0:
continue
else:
if n_bads <= n_interpolate:
interp_chs_mask = drop_log[epoch_idx] == 1
else:
# get peak-to-peak for channels in that epoch
data = epochs[epoch_idx].get_data()[0]
peaks = np.ptp(data, axis=-1)
peaks[non_picks] = -np.inf
# find channels which are bad by rejection threshold
interp_chs_mask = drop_log[epoch_idx] == 1
# ignore good channels
peaks[~interp_chs_mask] = -np.inf
# find the ordering of channels amongst the bad channels
sorted_ch_idx_picks = np.argsort(peaks)[::-1]
# then select only the worst n_interpolate channels
interp_chs_mask[
sorted_ch_idx_picks[n_interpolate:]] = False
fix_log[epoch_idx][interp_chs_mask] = 2
interp_chs = np.where(interp_chs_mask)[0]
interp_chs = [ch_name for idx, ch_name in enumerate(ch_names)
if idx in interp_chs]
interp_channels.append(interp_chs)
return interp_channels, fix_log
def comprz_dB(xx,fr=0.05):
""" Compress signal in such a way that is logarithmic but also avoids negative values """
x = numpy.copy(xx)
sh = xx.shape
x = x.reshape(-1)
x = comprz(x)
x = numpy.setdiff1d(x,numpy.array([0.0]))
xs = numpy.sort(x)
mini = xs[int(fr*len(x))]
mn = numpy.ones_like(xx)*mini
xx = numpy.where(xx > mini, xx, mn)
xx = xx.reshape(sh)
return(10.0*numpy.log10(xx))
def random_balanced_partitions(data, first_size, labels, random=np.random):
"""Split data into a balanced random partition and the rest
Partition the `data` array into two random partitions, using
the `labels` array (of equal size) to guide the choice of
elements of the first returned array.
Example:
random_balanced_partition(['a', 'b', 'c'], 2, [3, 5, 5])
# Both labels 3 and 5 need to be presented once, so
# the result can be either (['a', 'b'], ['c']) or
# (['a', 'c'], ['b']) but not (['b', 'c'], ['a']).
Args:
data (ndarray): data to be split
first_size (int): size of the first partition
balance (ndarray): according to which balancing is done
random (RandomState): source of randomness
Return:
tuple of two ndarrays
"""
assert len(data) == len(labels)
classes, class_counts = np.unique(labels, return_counts=True)
assert len(classes) <= 10000, "surprisingly many classes: {}".format(len(classes))
assert first_size % len(classes) == 0, "not divisible: {}/{}".format(first_size, len(classes))
assert np.all(class_counts >= first_size // len(classes)), "not enough examples of some class"
idxs_per_class = [np.nonzero(labels == klass)[0] for klass in classes]
chosen_idxs_per_class = [
random.choice(idxs, first_size // len(classes), replace=False)
for idxs in idxs_per_class
]
first_idxs = np.concatenate(chosen_idxs_per_class)
second_idxs = np.setdiff1d(np.arange(len(labels)), first_idxs)
assert first_idxs.shape == (first_size,)
assert second_idxs.shape == (len(data) - first_size,)
return data[first_idxs], data[second_idxs]
def least_squares_multipliers(self, x):
"""Compute least-squares multipliers estimates."""
al_model = self.model
slack_model = self.model.model
m = slack_model.m
n = slack_model.n
lim = max(2 * m, 2 * n)
J = slack_model.jop(x)
# Determine which bounds are active to remove appropriate columns of J
on_bound = self.get_active_bounds(x,
slack_model.Lvar,
slack_model.Uvar)
free_vars = np.setdiff1d(np.arange(n, dtype=np.int), on_bound)
Jred = ReducedJacobian(J, np.arange(m, dtype=np.int),
free_vars)
g = slack_model.grad(x) - J.T * al_model.pi
lsqr = LSQRSolver(Jred.T)
lsqr.solve(g[free_vars], itnlim=lim)
if lsqr.optimal:
al_model.pi += lsqr.x.copy()
else:
self.log.debug("lsqr failed to converge")
return
def _open_file(self):
# only apply _skip property at the beginning of the trajectory
skip = self._data_source._skip[self._itraj] + self.skip if self._t == 0 else 0
nt = self._data_source._skip[self._itraj] + self._data_source._lengths[self._itraj]
# calculate an index set, which rows to skip (includes stride)
skip_rows = np.empty(0)
if skip > 0:
skip_rows = np.zeros(nt)
skip_rows[:skip] = np.arange(skip)
if not self.uniform_stride:
all_frames = np.arange(nt)
skip_rows = np.setdiff1d(all_frames, self.ra_indices_for_traj(self._itraj), assume_unique=True)
elif self.stride > 1:
all_frames = np.arange(nt)
if skip_rows is not None:
wanted_frames = np.arange(skip, nt, self.stride)
else:
wanted_frames = np.arange(0, nt, self.stride)
skip_rows = np.setdiff1d(
all_frames, wanted_frames, assume_unique=True)
self._skip_rows = skip_rows
try:
fh = open(self._data_source.filenames[self._itraj],
mode=self._data_source.DEFAULT_OPEN_MODE)
self._file_handle = fh
except EnvironmentError:
self._logger.exception()
raise
def setdiff1d(ar1, ar2, assume_unique=False):
"""
Find the set difference of two arrays.
Return the sorted, unique values in `ar1` that are not in `ar2`.
Parameters
----------
ar1 : array_like
Input array.
ar2 : array_like
Input comparison array.
assume_unique : bool
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
Returns
-------
setdiff1d : ndarray
Sorted 1D array of values in `ar1` that are not in `ar2`.
See Also
--------
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
Examples
--------
>>> a = np.array([1, 2, 3, 2, 4, 1])
>>> b = np.array([3, 4, 5, 6])
>>> np.setdiff1d(a, b)
array([1, 2])
"""
if assume_unique:
ar1 = np.asarray(ar1).ravel()
else:
ar1 = unique(ar1)
ar2 = unique(ar2)
return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
def load_boston_data(prop=400/506):
from sklearn import datasets
boston = datasets.load_boston()
X, y = boston.data, boston.target
y = y[:, None]
ntrain = y.shape[0]
train_inds = npr.choice(range(ntrain), int(prop*ntrain), replace=False)
valid_inds = np.setdiff1d(range(ntrain), train_inds)
X_train, y_train = X[train_inds].copy(), y[train_inds].copy()
X_valid, y_valid = X[valid_inds].copy(), y[valid_inds].copy()
return X_train, y_train, X_valid, y_valid
############################ Training Phase ############################
def _load_cv_data(self, list_files):
"""Load training and cross-validation sets."""
# Split files for training and validation sets
val_files = np.array_split(list_files, self.n_folds)
train_files = np.setdiff1d(list_files, val_files[self.fold_idx])
# Load a npz file
print "Load training set:"
data_train, label_train = self._load_npz_list_files(train_files)
print " "
print "Load validation set:"
data_val, label_val = self._load_npz_list_files(val_files[self.fold_idx])
print " "
# Reshape the data to match the input of the model - conv2d
data_train = np.squeeze(data_train)
data_val = np.squeeze(data_val)
data_train = data_train[:, :, np.newaxis, np.newaxis]
data_val = data_val[:, :, np.newaxis, np.newaxis]
# Casting
data_train = data_train.astype(np.float32)
label_train = label_train.astype(np.int32)
data_val = data_val.astype(np.float32)
label_val = label_val.astype(np.int32)
return data_train, label_train, data_val, label_val
def _load_cv_data(self, list_files):
"""Load sequence training and cross-validation sets."""
# Split files for training and validation sets
val_files = np.array_split(list_files, self.n_folds)
train_files = np.setdiff1d(list_files, val_files[self.fold_idx])
# Load a npz file
print "Load training set:"
data_train, label_train = self._load_npz_list_files(train_files)
print " "
print "Load validation set:"
data_val, label_val = self._load_npz_list_files(val_files[self.fold_idx])
print " "
return data_train, label_train, data_val, label_val