def confirmContent_NoOrder(self):
'''
Confirms that all information is the same except for the maxRange field.
Order is ignored
'''
sf = StationFinder()
stations = sf.findStation(unitCode = self.unitCode, distance = self.distance,
climateParameters = self.climateParameters, sdate = self.sdate, edate = self.edate)
test_data = numpy.array(stations._dumpMetaToList())
ref_data = []
with open(Test_StationFinder.rootFolder + self.refFile, 'r')as refFile:
r = csv.reader(refFile)
for line in r:
ref_data.append(line)
ref_data = numpy.array(ref_data)
refFile.close()
self.results = list(numpy.setdiff1d(ref_data[:,Test_StationFinder.testColumns]
,test_data[:,Test_StationFinder.testColumns]))
python类setdiff1d()的实例源码
def confirmContent(self):
'''
Confirms that all information is the same, ignoring record order
'''
dr = StationDataRequestor()
wxData = dr.getDailyWxObservations(climateStations = self.climateStations,
climateParameters = self.climateParameters
,sdate = self.sdate, edate = self.edate)
wxData.export('temp.csv')
infile = open('temp.csv','r')
testData = infile.read()
refDataFile = open(Test_StationDataRequestor_getDailyWxObs.rootFolder + self.refDataFile, 'r')
refData = refDataFile.read()
infile.close()
refDataFile.close()
os.remove('temp.csv')
self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def confirmContent(self):
'''
Confirms that all information is the same, ignoring record order
'''
dr = StationDataRequestor()
wxData = dr.getMonthlyWxSummaryByYear(climateStations = self.climateStations,
climateParameters = self.climateParameters, reduceCodes = self.reduceCodes
,sdate = self.sdate, edate = self.edate, maxMissing = self.maxMissing,
includeNormals = self.includeNormals, includeNormalDepartures = self.includeNormalDepartures)
wxData.export('temp.csv')
infile = open('temp.csv','r')
testData = infile.read()
refDataFile = open(Test_StationDataRequestor_getMonthlyWxSummaryByYear.rootFolder + self.refDataFile, 'r')
refData = refDataFile.read()
infile.close()
refDataFile.close()
os.remove('temp.csv')
self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def confirmContent(self):
'''
Confirms that all information is the same, ignoring record order
'''
dr = StationDataRequestor()
wxData = dr.getYearlyWxSummary(climateStations = self.climateStations,
climateParameters = self.climateParameters, reduceCodes = self.reduceCodes
,sdate = self.sdate, edate = self.edate, maxMissing = self.maxMissing,
includeNormals = self.includeNormals, includeNormalDepartures = self.includeNormalDepartures)
wxData.export('temp.csv')
infile = open('temp.csv','r')
testData = infile.read()
refDataFile = open(Test_StationDataRequestor_getYearlyWxSummary.rootFolder + self.refDataFile, 'r')
refData = refDataFile.read()
infile.close()
refDataFile.close()
os.remove('temp.csv')
self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def check_multiplication_dims(dims, N, M, vidx=False, without=False):
dims = array(dims, ndmin=1)
if len(dims) == 0:
dims = arange(N)
if without:
dims = setdiff1d(range(N), dims)
if not np.in1d(dims, arange(N)).all():
raise ValueError('Invalid dimensions')
P = len(dims)
sidx = np.argsort(dims)
sdims = dims[sidx]
if vidx:
if M > N:
raise ValueError('More multiplicants than dimensions')
if M != N and M != P:
raise ValueError('Invalid number of multiplicants')
if P == M:
vidx = sidx
else:
vidx = sdims
return sdims, vidx
else:
return sdims
def _annotate_epochs(self, threshes, epochs):
"""Get essential annotations for epochs given thresholds."""
ch_type = _get_ch_type_from_picks(self.picks, epochs.info)[0]
drop_log, bad_sensor_counts = self._vote_bad_epochs(epochs)
interp_channels, fix_log = self._get_epochs_interpolation(
epochs, drop_log=drop_log, ch_type=ch_type)
(bad_epochs_idx, sorted_epoch_idx,
n_epochs_drop) = self._get_bad_epochs(
bad_sensor_counts, ch_type=ch_type)
bad_epochs_idx = np.sort(bad_epochs_idx)
good_epochs_idx = np.setdiff1d(np.arange(len(epochs)),
bad_epochs_idx)
return (drop_log, bad_sensor_counts, interp_channels, fix_log,
bad_epochs_idx, good_epochs_idx)
def has_approx_support(m, m_hat, prob=0.01):
"""Returns 1 if model selection error is less than or equal to prob rate,
0 else.
NOTE: why does np.nonzero/np.flatnonzero create so much problems?
"""
m_nz = np.flatnonzero(np.triu(m, 1))
m_hat_nz = np.flatnonzero(np.triu(m_hat, 1))
upper_diagonal_mask = np.flatnonzero(np.triu(np.ones(m.shape), 1))
not_m_nz = np.setdiff1d(upper_diagonal_mask, m_nz)
intersection = np.in1d(m_hat_nz, m_nz) # true positives
not_intersection = np.in1d(m_hat_nz, not_m_nz) # false positives
true_positive_rate = 0.0
if len(m_nz):
true_positive_rate = 1. * np.sum(intersection) / len(m_nz)
true_negative_rate = 1. - true_positive_rate
false_positive_rate = 0.0
if len(not_m_nz):
false_positive_rate = 1. * np.sum(not_intersection) / len(not_m_nz)
return int(np.less_equal(true_negative_rate + false_positive_rate, prob))
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
y = column_or_1d(y, warn=True)
classes = np.unique(y)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
self.classes_ = np.hstack((self.classes_, diff))
return np.searchsorted(self.classes_, y)[0]
def initialize(self, coordinates): #not run until after first selection!
# set up mines
# randomly place mines anywhere *except* first selected location AND surrounding cells
# so that first selection is always a 0
# weird, yes, but that's how the original minesweeper worked
availableCells = range(self.totalCells)
selected = coordinates[0]*self.dim2 + coordinates[1]
offLimits = np.array([selected-self.dim2-1, selected-self.dim2, selected-self.dim2+1, selected-1, selected, selected+1, selected+self.dim2-1, selected+self.dim2, selected+self.dim2+1]) #out of bounds is ok
availableCells = np.setdiff1d(availableCells, offLimits)
self.nMines = np.minimum(self.nMines, len(availableCells)) #in case there are fewer remaining cells than mines to place
minesFlattened = np.zeros([self.totalCells])
minesFlattened[np.random.choice(availableCells, self.nMines, replace=False)] = 1
self.mines = minesFlattened.reshape([self.dim1, self.dim2])
# set up neighbors
for i in range(self.dim1):
for j in range(self.dim2):
nNeighbors = 0
for k in range(-1, 2):
if i + k >= 0 and i + k < self.dim1:
for l in range(-1, 2):
if j + l >= 0 and j + l < self.dim2 and (k != 0 or l != 0):
nNeighbors += self.mines[i + k, j + l]
self.neighbors[i, j] = nNeighbors
#done
self.initialized = True
def find_neighbours(self, idx, features):
"""
Finds the neighbours of the given point which are at a maximum distance
of self.eps from it.
:param idx: Index of the current point
:param features: Dataset, array-like object of shape
(nb_samples, nb_features)
:returns: List containing the indexes of the neighbours
"""
data = features[np.setdiff1d(np.arange(features.shape[0]), idx)]
distances = self.get_distances(features[idx], data)
same_cluster = [idx]
for i, dist in enumerate(distances.tolist()[0]):
real_index = i if i < idx else i + 1
if dist <= self.eps:
same_cluster.append(real_index)
return same_cluster
def load_co2_data(prop=0.8):
from sklearn.datasets import fetch_mldata
from sklearn import cross_validation
data = fetch_mldata('mauna-loa-atmospheric-co2').data
X = data[:, [1]]
y = data[:, 0]
y = y[:, None]
X = X.astype(np.float64)
ntrain = y.shape[0]
train_inds = npr.choice(range(ntrain), int(prop*ntrain), replace=False)
valid_inds = np.setdiff1d(range(ntrain), train_inds)
X_train, y_train = X[train_inds].copy(), y[train_inds].copy()
X_valid, y_valid = X[valid_inds].copy(), y[valid_inds].copy()
return X_train, y_train, X_valid, y_valid
############################ Training & Visualizing ############################
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y.ravel(), warn=True)
classes = np.unique(y)
if isinstance(classes[0], np.float64):
classes = classes[np.isfinite(classes)]
_check_numpy_unicode_bug(classes)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
print(self.classes_)
raise ValueError("y contains new labels: %s" % str(diff))
return np.searchsorted(self.classes_, y).reshape(-1, 1)
def inverse_transform(self, y):
"""Transform labels back to original encoding.
Parameters
----------
y : numpy array of shape [n_samples]
Target values.
Returns
-------
y : numpy array of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
diff = np.setdiff1d(y, np.arange(len(self.classes_)))
if diff:
raise ValueError("y contains new labels: %s" % str(diff))
y = np.asarray(y)
return self.classes_[y]
def sortclusters(CoPaM, Mc, minGenesinClust = 11):
Mcloc = np.array(Mc)
[Np, K] = Mcloc.shape
largerThanMax = np.max(Mcloc) + 1
Cf = np.zeros(K, dtype=int) - 1
for i in range(Np-1,-1,-1):
C = np.argsort(Mcloc[i])[::-1]
M = Mcloc[i,C]
Cf[np.all([M >= minGenesinClust, Cf == 0], axis=0)] = C[np.all([M >= minGenesinClust, Cf == 0], axis=0)]
if i > 0:
Mcloc[i-1, Cf[Cf != 0]] = largerThanMax
Cf[Cf==-1] = np.setdiff1d(np.arange(K), Cf)
return np.array(CoPaM)[:, Cf]
# Clustering helping function for parallel loop
test_raw_fiff.py 文件源码
项目:decoding_challenge_cortana_2016_3rd
作者: kingjr
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_fix_types():
"""Test fixing of channel types
"""
for fname, change in ((hp_fif_fname, True), (test_fif_fname, False),
(ctf_fname, False)):
raw = Raw(fname)
mag_picks = pick_types(raw.info, meg='mag')
other_picks = np.setdiff1d(np.arange(len(raw.ch_names)), mag_picks)
# we don't actually have any files suffering from this problem, so
# fake it
if change:
for ii in mag_picks:
raw.info['chs'][ii]['coil_type'] = FIFF.FIFFV_COIL_VV_MAG_T2
orig_types = np.array([ch['coil_type'] for ch in raw.info['chs']])
raw.fix_mag_coil_types()
new_types = np.array([ch['coil_type'] for ch in raw.info['chs']])
if not change:
assert_array_equal(orig_types, new_types)
else:
assert_array_equal(orig_types[other_picks], new_types[other_picks])
assert_true((orig_types[mag_picks] != new_types[mag_picks]).all())
assert_true((new_types[mag_picks] ==
FIFF.FIFFV_COIL_VV_MAG_T3).all())
def get_finished_jobs(job_ids):
"""Get a list of finished job ids for the given list of jobs
Keyword arguments:
job_ids -- list of lobs that shall be checked
"""
data = get_qstat_as_df()
finished_jobs = []
if len(data) == 0:
return job_ids
ids_in_data = data[data["JOBID"].isin(job_ids)]
finished_jobs = np.setdiff1d(job_ids, ids_in_data["JOBID"])
return np.array(finished_jobs)
def fix_predictions(self, X, predictions, bias):
idxs_users_missing, idxs_items_missing = self.indices_missing
# Set average when neither the user nor the item exist
g_avg = bias['globalAvg']
common_indices = np.intersect1d(idxs_users_missing, idxs_items_missing)
predictions[common_indices] = g_avg
# Only users exist (return average + {dUser})
if 'dUsers' in bias:
missing_users = np.setdiff1d(idxs_users_missing, common_indices)
if len(missing_users) > 0:
user_idxs = X[missing_users, self.order[0]]
predictions[missing_users] = g_avg + bias['dUsers'][user_idxs]
# Only items exist (return average + {dItem})
if 'dItems' in bias:
missing_items = np.setdiff1d(idxs_items_missing, common_indices)
if len(missing_items) > 0:
item_idxs = X[missing_items, self.order[1]]
predictions[missing_items] = g_avg + bias['dItems'][item_idxs]
return predictions
def measure_background(image, Fibers, width=30, niter=3, order=3):
t = []
a,b = image.shape
ygrid,xgrid = np.indices(image.shape)
ygrid = 1. * ygrid.ravel() / a
xgrid = 1. * xgrid.ravel() / b
image = image.ravel()
s = np.arange(a*b)
for fiber in Fibers:
t.append(fiber.D*fiber.yind + fiber.xind)
t = np.hstack(t)
t = np.array(t, dtype=int)
ind = np.setdiff1d(s,t)
mask = np.zeros((a*b))
mask[ind] = 1.
mask[ind] = 1.-is_outlier(image[ind])
sel = np.where(mask==1.)[0]
for i in xrange(niter):
V = polyvander2d(xgrid[sel],ygrid[sel],[order,order])
sol = np.linalg.lstsq(V, image[sel])[0]
vals = np.dot(V,sol) - image[sel]
sel = sel[~is_outlier(vals)]
V = polyvander2d(xgrid,ygrid,[order,order])
back = np.dot(V, sol).reshape(a,b)
return back
def _parallel_predict_log_proba(estimators, estimators_features, X, n_classes):
"""Private function used to compute log probabilities within a job."""
n_samples = X.shape[0]
log_proba = np.empty((n_samples, n_classes))
log_proba.fill(-np.inf)
all_classes = np.arange(n_classes, dtype=np.int)
for estimator, features in zip(estimators, estimators_features):
log_proba_estimator = estimator.predict_log_proba(X[:, features])
if n_classes == len(estimator.classes_):
log_proba = np.logaddexp(log_proba, log_proba_estimator)
else:
log_proba[:, estimator.classes_] = np.logaddexp(
log_proba[:, estimator.classes_],
log_proba_estimator[:, range(len(estimator.classes_))])
missing = np.setdiff1d(all_classes, estimator.classes_)
log_proba[:, missing] = np.logaddexp(log_proba[:, missing],
-np.inf)
return log_proba
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y, warn=True)
classes = np.unique(y)
_check_numpy_unicode_bug(classes)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
raise ValueError("y contains new labels: %s" % str(diff))
return np.searchsorted(self.classes_, y)
def inverse_transform(self, y):
"""Transform labels back to original encoding.
Parameters
----------
y : numpy array of shape [n_samples]
Target values.
Returns
-------
y : numpy array of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
diff = np.setdiff1d(y, np.arange(len(self.classes_)))
if diff:
raise ValueError("y contains new labels: %s" % str(diff))
y = np.asarray(y)
return self.classes_[y]
def _match_info(self):
"""
Helper function to create match info
"""
assert self.matches is not None, 'No matches yet!'
self.matches = {
'match_pairs' : self.matches,
'treated' : np.unique(list(self.matches.keys())),
'control' : np.unique(list(self.matches.values()))
}
self.matches['dropped'] = np.setdiff1d(list(range(self.nobs)),
np.append(self.matches['treated'], self.matches['control']))
def confirmAsciiGrid(self):
gr = GridRequestor()
data = gr.getGrids(sdate = self.sdate, edate = self.edate,
unitCode = self.unitCode, distance = self.distance,
climateParameters = self.climateParameters, duration = self.duration)
testDataFile = data.export()[0]
testFile = open(testDataFile,'r')
testData = testFile.read()
testFile.close()
os.remove(testDataFile)
os.remove(testDataFile[:-3] + 'prj')
refDataFile = open(Test_GridRequestor.rootFolder + self.refDataFile,'r')
refData = refDataFile.read()
refDataFile.close()
self.result = list(numpy.setdiff1d(refData.split('/n'),testData.split('/n')))
def prepare(data_valid):
print(data_valid.shape)
batch = data_valid.shape[0]
N = data_valid.shape[1]
data_invalid = np.random.randint(0,2,(batch,N),dtype=np.int8)
print(data_valid.shape,data_invalid.shape)
ai = data_invalid.view([('', data_invalid.dtype)] * N)
av = data_valid.view ([('', data_valid.dtype)] * N)
data_invalid = np.setdiff1d(ai, av).view(data_valid.dtype).reshape((-1, N))
return prepare_binary_classification_data(data_valid, data_invalid)
# default values
def prepare(data):
num = len(data)
dim = data.shape[1]//2
print(data.shape,num,dim)
pre, suc = data[:,:dim], data[:,dim:]
suc_invalid = np.copy(suc)
random.shuffle(suc_invalid)
data_invalid = np.concatenate((pre,suc_invalid),axis=1)
ai = data_invalid.view([('', data_invalid.dtype)] * 2*dim)
av = data.view ([('', data.dtype)] * 2*dim)
data_invalid = np.setdiff1d(ai, av).view(data_invalid.dtype).reshape((-1, 2*dim))
inputs = np.concatenate((data,data_invalid),axis=0)
outputs = np.concatenate((np.ones((num,1)),np.zeros((len(data_invalid),1))),axis=0)
print(inputs.shape,outputs.shape)
io = np.concatenate((inputs,outputs),axis=1)
random.shuffle(io)
train_n = int(2*num*0.9)
train, test = io[:train_n], io[train_n:]
train_in, train_out = train[:,:dim*2], train[:,dim*2:]
test_in, test_out = test[:,:dim*2], test[:,dim*2:]
return train_in, train_out, test_in, test_out
# default values
def set_difference(a, b):
assert a.shape[1:] == b.shape[1:]
a = a.copy()
b = b.copy()
a_v = a.view([('', a.dtype)] * a.shape[1])
b_v = b.view([('', b.dtype)] * b.shape[1])
return np.setdiff1d(a_v, b_v).view(a.dtype).reshape((-1, a.shape[1]))
def test_indices(self, test_indices):
if test_indices is None:
self._train_indices = np.arange(0, len(self.y))
else:
self._test_indices = test_indices
self._train_indices = np.setdiff1d(np.arange(0, len(self.y)), self.test_indices)
def setdiff1d(ar1, ar2, assume_unique=False):
"""
Find the set difference of two arrays.
Return the sorted, unique values in `ar1` that are not in `ar2`.
Parameters
----------
ar1 : array_like
Input array.
ar2 : array_like
Input comparison array.
assume_unique : bool
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
Returns
-------
setdiff1d : ndarray
Sorted 1D array of values in `ar1` that are not in `ar2`.
See Also
--------
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
Examples
--------
>>> a = np.array([1, 2, 3, 2, 4, 1])
>>> b = np.array([3, 4, 5, 6])
>>> np.setdiff1d(a, b)
array([1, 2])
"""
if assume_unique:
ar1 = np.asarray(ar1).ravel()
else:
ar1 = unique(ar1)
ar2 = unique(ar2)
return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
def stabilize(self, prior_columns, percent):
"""
This activates prior columns to force active in order to maintain
the given percent of column overlap between time steps. Always call
this between compute and learn!
"""
# num_active = (len(self.columns) + len(prior_columns)) / 2
num_active = len(self.columns)
overlap = self.columns.overlap(prior_columns)
stabile_columns = int(round(num_active * overlap))
target_columns = int(round(num_active * percent))
add_columns = target_columns - stabile_columns
if add_columns <= 0:
return
eligable_columns = np.setdiff1d(prior_columns.flat_index, self.columns.flat_index)
eligable_excite = self.raw_excitment[eligable_columns]
selected_col_nums = np.argpartition(-eligable_excite, add_columns-1)[:add_columns]
selected_columns = eligable_columns[selected_col_nums]
selected_index = np.unravel_index(selected_columns, self.columns.dimensions)
# Learn. Note: selected columns will learn twice. The previously
# active segments learn now, the current most excited segments in the
# method SP.learn().
# Or learn not at all if theres a bug in my code...
# if self.multisegment:
# if hasattr(self, 'prior_segment_excitement'):
# segment_excitement = self.prior_segment_excitement[selected_index]
# seg_idx = np.argmax(segment_excitement, axis=-1)
# self.proximal.learn_outputs(input_sdr=input_sdr,
# output_sdr=selected_index + (seg_idx,))
# self.prev_segment_excitement = self.segment_excitement
# else:
# 1/0
self.columns.flat_index = np.concatenate([self.columns.flat_index, selected_columns])
def sample_weights(self, idxs, scores):
N = len(scores)
S1 = scores[np.setdiff1d(np.arange(N), idxs)].sum()
return np.tile([float(N), float(S1)], (len(idxs), 1))