def main():
parser = generate_parser()
args = parser.parse_args()
infile1 = h5py.File(args.input1, 'r')
infile2 = h5py.File(args.input2, 'r')
resolutions = numpy.intersect1d(infile1['resolutions'][...], infile2['resolutions'][...])
chroms = numpy.intersect1d(infile2['chromosomes'][...], infile2['chromosomes'][...])
results = {}
data1 = load_data(infile1, chroms, resolutions)
data2 = load_data(infile2, chroms, resolutions)
infile1.close()
infile2.close()
results = {}
results[(args.input1.split('/')[-1].strip('.quasar'), args.input2.split('/')[-1].strip('.quasar'))] = correlate_samples(data1, data2)
for resolution in data1.keys():
for chromo in chroms:
plt.scatter(data1[resolution][chromo][1].flatten(),data2[resolution][chromo][1].flatten(),alpha=0.1,color='red')
plt.show()
plt.savefig(args.output+'.res'+str(resolution)+'.chr'+chromo+'.pdf')
python类intersect1d()的实例源码
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def clean_data(self, df, is_with_MICE=0):
df = df.copy()
if df.isnull().sum().sum() > 0:
if is_with_MICE:
# Imputation using MICE
numerical_features_names = self.extract_numerical_features(df)
df.loc[:, tuple(numerical_features_names)] = self.estimate_by_mice(df[numerical_features_names])
else:
if any(tuple(df.columns == 'y')):
df = df.dropna()
else:
df = df.dropna(1)
TwoSigmaFinModTools._feature_names_num = pd.Series(data=np.intersect1d(
TwoSigmaFinModTools._feature_names_num.values, df.columns), dtype=object)
TwoSigmaFinModTools._numerical_feature_names = TwoSigmaFinModTools.extract_numerical_features(df)
return df
def RecursionTree(self, node, x, x_rows, y):
#????
if len(x_rows) <= 0:
return
#????
if node.HasChildren() == False:
y[x_rows] = node.GetLabel()
# logger.debug('predict ????:%d,x_rows:%s', node.GetLabel(),x_rows)
return
feature = node.GetSplitAttr()
rest_x_row = np.array(x_rows, dtype = np.int)
for (value, child) in node.GetChildren():
new_x_row = np.intersect1d(x_rows, np.where(x[:,feature] == value)[0])
rest_x_row = arraysetops.setxor1d(rest_x_row, new_x_row, True)
self.RecursionTree(child, x, new_x_row, y)
#???????????????????????
y[rest_x_row] = self.Classify(y, x_rows)
def uintersect1d(arr1, arr2, assume_unique=False):
"""Find the sorted unique elements of the two input arrays.
A wrapper around numpy.intersect1d that preserves units. All input arrays
must have the same units. See the documentation of numpy.intersect1d for
full details.
Examples
--------
>>> A = yt.YTArray([1, 2, 3], 'cm')
>>> B = yt.YTArray([2, 3, 4], 'cm')
>>> uintersect1d(A, B)
YTArray([ 2., 3.]) cm
"""
v = np.intersect1d(arr1, arr2, assume_unique=assume_unique)
v = validate_numpy_wrapper_units(v, [arr1, arr2])
return v
def uunion1d(arr1, arr2):
"""Find the union of two arrays.
A wrapper around numpy.intersect1d that preserves units. All input arrays
must have the same units. See the documentation of numpy.intersect1d for
full details.
Examples
--------
>>> A = yt.YTArray([1, 2, 3], 'cm')
>>> B = yt.YTArray([2, 3, 4], 'cm')
>>> uunion1d(A, B)
YTArray([ 1., 2., 3., 4.]) cm
"""
v = np.union1d(arr1, arr2)
v = validate_numpy_wrapper_units(v, [arr1, arr2])
return v
def clean_data(self, df):
df = df.copy()
is_with_MICE = 1
if df.isnull().sum().sum() > 0:
if is_with_MICE:
# Imputation using MICE
numerical_features_names = self.extract_numerical_features(df)
df.loc[:, tuple(numerical_features_names)] = self.estimate_by_mice(df[numerical_features_names])
else:
if any(tuple(df.columns == 'SalePrice')):
df = df.dropna()
else:
df = df.dropna(1)
HousePrices._feature_names_num = pd.Series(data=np.intersect1d(HousePrices._feature_names_num
.values, df.columns), dtype=object)
return df
model2.py 文件源码
项目:movie-recommendation-using-RBM
作者: pinkeshbadjatiya
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def getNui(self):
# Generates Users who are trusted by user u and have rated product i
sz = len(self.R_train_ui) + len(self.R_test_ui)
for u, i in self.R_train_ui:
# Users who have rated product i
rat_u = self.R_train[np.where(self.R_train[:, 1] == i), 0]
# Users trusted by u
trust_u = self.W[np.where(self.W[:, 0] == u),1]
self.V[u, i] = np.intersect1d(rat_u, trust_u)
print u,i,self.V[u, i]
for u, i in self.R_test_ui:
# Users who have rated product i
rat_u = self.R_train[np.where(self.R_train[:, 1] == i), 0]
# Users trusted by u
trust_u = self.W[np.where(self.W[:, 0] == u),1]
self.V[u, i] = np.intersect1d(rat_u, trust_u)
print u,i,self.V[u, i]
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
y = column_or_1d(y, warn=True)
classes = np.unique(y)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
self.classes_ = np.hstack((self.classes_, diff))
return np.searchsorted(self.classes_, y)[0]
test_range.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def test_intersection(self):
# intersect with Int64Index
other = Index(np.arange(1, 6))
result = self.index.intersection(other)
expected = np.sort(np.intersect1d(self.index.values, other.values))
self.assert_numpy_array_equal(result, expected)
result = other.intersection(self.index)
expected = np.sort(np.asarray(np.intersect1d(self.index.values,
other.values)))
self.assert_numpy_array_equal(result, expected)
# intersect with increasing RangeIndex
other = RangeIndex(1, 6)
result = self.index.intersection(other)
expected = np.sort(np.intersect1d(self.index.values, other.values))
self.assert_numpy_array_equal(result, expected)
# intersect with decreasing RangeIndex
other = RangeIndex(5, 0, -1)
result = self.index.intersection(other)
expected = np.sort(np.intersect1d(self.index.values, other.values))
self.assert_numpy_array_equal(result, expected)
def getAccRel(sim,dst):
# for how many is the most similar one also the one with the smallest dst
# relaxed formulation: of the most similar ones (if more than one is equally similar) one is among those with smallest dst (if more than one is equally far away)
maxSim = numpy.max(sim, axis=1)
minDst = numpy.min(dst, axis=1)
nSamp = sim.shape[0]
nCorrect = 0.
nCorrectClass = 0.
for i in xrange(nSamp):
maxSimIdx, = (sim[i,:] == maxSim[i]).nonzero()
minDstIdx, = (dst[i,:] == minDst[i]).nonzero()
if len(numpy.intersect1d(maxSimIdx,minDstIdx, assume_unique=True)) > 0:
nCorrect += 1.
if numpy.min(sim[i,minDstIdx]) > -2.:
nCorrectClass += 1.
acc = nCorrect / nSamp
# classification accuracy. for how many percent is the closest from the correct class
classAcc = nCorrectClass / nSamp
return (acc,classAcc)
def find_neighbors(self,_quadlist):
import numpy as np
neighbors = np.array([])
edges = [self.vertex_ids[[0,1]],
self.vertex_ids[[1,2]],
self.vertex_ids[[2,3]],
self.vertex_ids[[3,0]]]
for e in edges:
has_vertex1 = np.where(_quadlist == e[0])[0]
has_vertex2 = np.where(_quadlist == e[1])[0]
same_edge = np.intersect1d(has_vertex1, has_vertex2)
neighbor = same_edge[same_edge != self.quad_id]
neighbors = np.append(neighbors, neighbor)
return neighbors.astype(int)
def resolve_inside_vertex(self, _local_v_id, _dc_quads):
# all quads connected to the vertex will be removed anyhow
delete_quads_list = self.manifold_vertex_quad_ids[_local_v_id]
new_quads_list = []
# change all references to old manifold vertex to references to the right child vertex
for q_id in delete_quads_list:
quad = _dc_quads[q_id]
tmp = quad.index(self.v_idx[_local_v_id])
new_quad = list(quad)
for child_id in range(2):
if np.intersect1d(quad, self.v_children_connection_idx[_local_v_id][child_id]).__len__() != 0:
new_quad[tmp] = self.v_children_idx[_local_v_id][child_id]
break
new_quads_list.append(new_quad)
return new_quads_list, delete_quads_list
def __init__(self, obs, est, minval=None):
# Check input
assert len(obs) == len(est), \
"obs and est need to have the same length. " \
"len(obs)=%d, len(est)=%d" % (len(obs), len(est))
# only remember those entries which have both valid observations
# AND estimates
ix = np.intersect1d(util._idvalid(obs, minval=minval),
util._idvalid(est, minval=minval))
self.n = len(ix)
if self.n == 0:
print("WARNING: No valid pairs of observed and "
"estimated available for ErrorMetrics!")
self.obs = np.array([])
self.est = np.array([])
else:
self.obs = obs[ix]
self.est = est[ix]
self.resids = self.est - self.obs
def inFootprint(self, pixels, nside=None):
"""
Open each valid filename for the set of pixels and determine the set
of subpixels with valid data.
"""
if numpy.isscalar(pixels): pixels = numpy.array([pixels])
if nside is None: nside = self.nside_likelihood
inside = numpy.zeros( len(pixels), dtype='bool')
if not self.nside_catalog:
catalog_pix = [0]
else:
catalog_pix = superpixel(pixels,nside,self.nside_catalog)
catalog_pix = numpy.intersect1d(catalog_pix,self.catalog_pixels)
for filenames in self.filenames[catalog_pix]:
#logger.debug("Loading %s"%filenames['mask_1'])
subpix_1,val_1 = ugali.utils.skymap.readSparseHealpixMap(filenames['mask_1'],'MAGLIM',construct_map=False)
#logger.debug("Loading %s"%filenames['mask_2'])
subpix_2,val_2 = ugali.utils.skymap.readSparseHealpixMap(filenames['mask_2'],'MAGLIM',construct_map=False)
subpix = numpy.intersect1d(subpix_1,subpix_2)
superpix = numpy.unique(ugali.utils.skymap.superpixel(subpix,self.nside_pixel,nside))
inside |= numpy.in1d(pixels, superpix)
return inside
def getCatalogPixels(self):
"""
Return the catalog pixels spanned by this ROI.
"""
filenames = self.config.getFilenames()
nside_catalog = self.config.params['coords']['nside_catalog']
nside_pixel = self.config.params['coords']['nside_pixel']
# All possible catalog pixels spanned by the ROI
superpix = ugali.utils.skymap.superpixel(self.pixels,nside_pixel,nside_catalog)
superpix = numpy.unique(superpix)
# Only catalog pixels that exist in catalog files
pixels = numpy.intersect1d(superpix, filenames['pix'].compressed())
return pixels
############################################################
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y.ravel(), warn=True)
classes = np.unique(y)
if isinstance(classes[0], np.float64):
classes = classes[np.isfinite(classes)]
_check_numpy_unicode_bug(classes)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
print(self.classes_)
raise ValueError("y contains new labels: %s" % str(diff))
return np.searchsorted(self.classes_, y).reshape(-1, 1)
defect_handling.py 文件源码
项目:nature_methods_multicut_pipeline
作者: ilastik
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def find_matching_indices(array, value_list):
assert isinstance(array, np.ndarray)
assert isinstance(value_list, np.ndarray)
# reimplemented in cython for speed # TODO !!! include in conda package
try:
from cython_tools import find_matching_indices_fast
return find_matching_indices_fast(array.astype('uint32'), value_list.astype('uint32'))
except ImportError:
print "WARNING: Could not find cython function, using slow numpy version"
indices = []
for i, row in enumerate(array):
if( np.intersect1d(row, value_list).size ):
indices.append(i)
return np.array(indices)
#
# Modified Adjacency
#
# TODO reactivate
def join(l1, l2): # join two sorted list
# n1 = len(l1)
# n2 = len(l2)
# #l1.sort()
# #l2.sort()
# p1 = 0
# p2 = 0
# ret = []
# while p1 < n1 and p2 < n2:
# if l1[p1] < l2[p2]:
# p1 += 1
# elif l1[p1] > l2[p2]:
# p2 += 1
# else:
# ret.append(l1[p1])
# p1 += 1
# p2 += 1
return np.intersect1d(l1, l2)
def add_observation(self, observation, indx=None):
"""
Parameters
----------
indx : ints
The indices of the healpixel map that have been observed by observation
"""
if observation['filter'][0] in self.filtername:
self.feature[indx] += 1
if self.mask_indx is not None:
overlap = np.intersect1d(indx, self.mask_indx)
if overlap.size > 0:
# interpolate over those pixels that are DD fields.
# XXX. Do I need to kdtree this? Maybe make a dict on init
# to lookup the N closest non-masked pixels, then do weighted average.
pass
def replaceCompWithExpansion(self, uid=0, xSS=None,
keysToSetNonExtraZero=['sumLogPiRemVec']):
''' Replace existing component with expanded set of statistics.
Post Condition
--------------
Values associated with uid are removed.
All entries of provided xSS are added last in index order.
'''
if not np.intersect1d(xSS.uids, self.uids).size == 0:
raise ValueError("Cannot expand with same uids.")
for key in self._Fields._FieldDims:
if key in keysToSetNonExtraZero:
arr = getattr(self._Fields, key)
arr.fill(0)
if hasattr(xSS, 'mUIDPairs'):
assert not self.hasMergeTerms()
self.setMergeUIDPairs(xSS.mUIDPairs)
self.insertComps(xSS)
self.removeComp(uid=uid)
def _stc_src_sel(src, stc):
""" Select the vertex indices of a source space using a source estimate
"""
if isinstance(stc, VolSourceEstimate):
vertices = [stc.vertices]
else:
vertices = stc.vertices
if not len(src) == len(vertices):
raise RuntimeError('Mismatch between number of source spaces (%s) and '
'STC vertices (%s)' % (len(src), len(vertices)))
src_sels = []
offset = 0
for s, v in zip(src, vertices):
src_sel = np.intersect1d(s['vertno'], v)
src_sel = np.searchsorted(s['vertno'], src_sel)
src_sels.append(src_sel + offset)
offset += len(s['vertno'])
src_sel = np.concatenate(src_sels)
return src_sel
def fix_predictions(self, X, predictions, bias):
idxs_users_missing, idxs_items_missing = self.indices_missing
# Set average when neither the user nor the item exist
g_avg = bias['globalAvg']
common_indices = np.intersect1d(idxs_users_missing, idxs_items_missing)
predictions[common_indices] = g_avg
# Only users exist (return average + {dUser})
if 'dUsers' in bias:
missing_users = np.setdiff1d(idxs_users_missing, common_indices)
if len(missing_users) > 0:
user_idxs = X[missing_users, self.order[0]]
predictions[missing_users] = g_avg + bias['dUsers'][user_idxs]
# Only items exist (return average + {dItem})
if 'dItems' in bias:
missing_items = np.setdiff1d(idxs_items_missing, common_indices)
if len(missing_items) > 0:
item_idxs = X[missing_items, self.order[1]]
predictions[missing_items] = g_avg + bias['dItems'][item_idxs]
return predictions
def test_shuffle_kfold():
# Check the indices are shuffled properly
kf = KFold(3)
kf2 = KFold(3, shuffle=True, random_state=0)
kf3 = KFold(3, shuffle=True, random_state=1)
X = np.ones(300)
all_folds = np.zeros(300)
for (tr1, te1), (tr2, te2), (tr3, te3) in zip(
kf.split(X), kf2.split(X), kf3.split(X)):
for tr_a, tr_b in combinations((tr1, tr2, tr3), 2):
# Assert that there is no complete overlap
assert_not_equal(len(np.intersect1d(tr_a, tr_b)), len(tr1))
# Set all test indices in successive iterations of kf2 to 1
all_folds[te2] = 1
# Check that all indices are returned in the different test folds
assert_equal(sum(all_folds), 300)
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2]),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50)
]
for y in ys:
sss = StratifiedShuffleSplit(6, test_size=0.33,
random_state=0).split(np.ones(len(y)), y)
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(y[train].size + y[test].size, y.size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2]),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50)
]
for y in ys:
sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33,
random_state=0)
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1])
/ float(len(y[train])))
p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1])
/ float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(y[train].size + y[test].size, y.size)
assert_array_equal(np.intersect1d(train, test), [])
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y, warn=True)
classes = np.unique(y)
_check_numpy_unicode_bug(classes)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
raise ValueError("y contains new labels: %s" % str(diff))
return np.searchsorted(self.classes_, y)
def common_ids(descendent_ids, ancestor_ids, threshold=0.5):
r"""
Determine if at least a given fraction of ancestor's member particles
are in the descendent.
Parameters
----------
descendent_ids : list of ints
Member ids for first halo.
ancestor_ids : list of int
Member ids for second halo.
threshold : float, optional
Critical fraction of ancestor's particles ending up in the
descendent to be considered a true ancestor. Default: 0.5.
Returns
-------
True or False
"""
common = np.intersect1d(descendent_ids, ancestor_ids)
return common.size > threshold * ancestor_ids.size
def _nn_pose_fill(valid):
"""
Looks up closest True for each False and returns
indices for fill-in-lookup
In: [True, False, True, ... , False, True]
Out: [0, 0, 2, ..., 212, 212]
"""
valid_inds, = np.where(valid)
invalid_inds, = np.where(~valid)
all_inds = np.arange(len(valid))
all_inds[invalid_inds] = -1
for j in range(10):
fwd_inds = valid_inds + j
bwd_inds = valid_inds - j
# Forward fill
invalid_inds, = np.where(all_inds < 0)
fwd_fill_inds = np.intersect1d(fwd_inds, invalid_inds)
all_inds[fwd_fill_inds] = all_inds[fwd_fill_inds-j]
# Backward fill
invalid_inds, = np.where(all_inds < 0)
if not len(invalid_inds): break
bwd_fill_inds = np.intersect1d(bwd_inds, invalid_inds)
all_inds[bwd_fill_inds] = all_inds[bwd_fill_inds+j]
# Check if any missing
invalid_inds, = np.where(all_inds < 0)
if not len(invalid_inds): break
# np.set_printoptions(threshold=np.nan)
# print valid.astype(np.int)
# print np.array_str(all_inds)
# print np.where(all_inds < 0)
return all_inds
def __indexs_select_pk0(self,pk0_roi0_h0,pk0_roi0_h1,pk0_roi1_h0,pk0_roi1_h1):
# get indexs of selected waveforms in pk0
spk_in_line = np.apply_along_axis(self.__in_select_line,1,self.waveforms_pk0,pk0_roi0_h0,pk0_roi0_h1)
changed_index = np.where(spk_in_line==True)[0]
changed_index = np.array(changed_index,dtype=np.int32)
spk_in_line1 = np.apply_along_axis(self.__in_select_line,1,self.waveforms_pk0,pk0_roi1_h0,pk0_roi1_h1)
changed_index1 = np.where(spk_in_line1==True)[0]
changed_index1 = np.array(changed_index1,dtype=np.int32)
changed_index = np.intersect1d(changed_index, changed_index1)
return changed_index + self.indexs_pk0[0]
def __indexs_select_pk0(self,pk0_roi0_h0,pk0_roi0_h1,pk0_roi1_h0,pk0_roi1_h1):
# get indexs of selected waveforms in pk0
spk_in_line = np.apply_along_axis(self.__in_select_line,1,self.waveforms_pk0,pk0_roi0_h0,pk0_roi0_h1)
changed_index = np.where(spk_in_line==True)[0]
changed_index = np.array(changed_index,dtype=np.int32)
spk_in_line1 = np.apply_along_axis(self.__in_select_line,1,self.waveforms_pk0,pk0_roi1_h0,pk0_roi1_h1)
changed_index1 = np.where(spk_in_line1==True)[0]
changed_index1 = np.array(changed_index1,dtype=np.int32)
changed_index = np.intersect1d(changed_index, changed_index1)
return changed_index + self.indexs_pk0[0]