def intersect_and_sort_samples(sample_metadata, feature_table):
'''Return input tables retaining only shared samples, row order equivalent.
Parameters
----------
sample_metadata : pd.DataFrame
Contingency table with rows, columns = samples, metadata.
feature_table : pd.DataFrame
Contingency table with rows, columns = samples, features.
Returns
-------
sample_metadata, feature_table : pd.DataFrame, pd.DataFrame
Input tables with unshared samples removed and ordered equivalently.
Raises
------
ValueError
If no shared samples are found.
'''
shared_samples = np.intersect1d(sample_metadata.index, feature_table.index)
if shared_samples.size == 0:
raise ValueError('There are no shared samples between the feature '
'table and the sample metadata. Ensure that you have '
'passed the correct files.')
elif (shared_samples.size == sample_metadata.shape[0] ==
feature_table.shape[0]):
s_metadata = sample_metadata.copy()
s_features = feature_table.copy()
else:
s_metadata = sample_metadata.loc[np.in1d(sample_metadata.index,
shared_samples), :].copy()
s_features = feature_table.loc[np.in1d(feature_table.index,
shared_samples), :].copy()
return s_metadata, s_features.loc[s_metadata.index, :]
python类intersect1d()的实例源码
def intersect_sim(array_1, array_2):
"""Calculate the simiarity of two arrays
by using intersection / union
"""
sim = float(np.intersect1d(array_1, array_2).size) / \
float(np.union1d(array_1, array_2).size)
return sim
preprocessing.py 文件源码
项目:Epileptic-Seizure-Prediction
作者: cedricsimar
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def split_data_in_epochs(self, data, epoch_length_sec, stride_sec):
"""
Split the signal in no-dropout-epochs of fixed length
"""
sig = np.array(data, dtype=np.float32) # [240000 x 16]
sig_epochs = []
samples_in_epoch = epoch_length_sec * SAMPLING_FREQUENCY
stride_shift = stride_sec * SAMPLING_FREQUENCY
# compute dropout indices (dropouts are at the same position across all channels)
drop_indices_c0 = np.where(sig[:,0]==0)[0]
drop_indices_c1 = np.where(sig[:,1]==0)[0]
drop_indices = np.intersect1d(drop_indices_c0, drop_indices_c1)
drop_indices = np.append(drop_indices, len(sig)) # add the index of the last element
window_start = 0
for window_end in drop_indices:
epoch_start = window_start
epoch_end = epoch_start + samples_in_epoch
while(epoch_end < window_end):
sig_epochs.append(sig[epoch_start:epoch_end, :])
epoch_start += stride_shift
epoch_end += stride_shift
window_start = window_end + 1
return(sig_epochs)
def Inversion(Qsca,Qabs,wavelength,diameter,nMin=1,nMax=3,kMin=0.001,kMax=1,scatteringPrecision=0.010,absorptionPrecision=0.010,spaceSize=120,interp=2):
error = lambda measured,calculated: np.abs((calculated-measured)/measured)
nRange = np.linspace(nMin,nMax,spaceSize)
kRange = np.logspace(np.log10(kMin),np.log10(kMax),spaceSize)
scaSpace = np.zeros((spaceSize,spaceSize))
absSpace = np.zeros((spaceSize,spaceSize))
for ni,n in enumerate(nRange):
for ki,k in enumerate(kRange):
_derp = fastMieQ(n+(1j*k),wavelength,diameter)
scaSpace[ni][ki] = _derp[0]
absSpace[ni][ki] = _derp[1]
if interp is not None:
nRange = zoom(nRange,interp)
kRange = zoom(kRange,interp)
scaSpace = zoom(scaSpace,interp)
absSpace = zoom(absSpace,interp)
scaSolutions = np.where(np.logical_and(Qsca*(1-scatteringPrecision)<scaSpace, scaSpace<Qsca*(1+scatteringPrecision)))
absSolutions = np.where(np.logical_and(Qabs*(1-absorptionPrecision)<absSpace, absSpace<Qabs*(1+absorptionPrecision)))
validScattering = nRange[scaSolutions[0]]+1j*kRange[scaSolutions[1]]
validAbsorption = nRange[absSolutions[0]]+1j*kRange[absSolutions[1]]
solution = np.intersect1d(validScattering,validAbsorption)
# errors = [error()]
return solution
def Inversion_SD(Bsca,Babs,wavelength,dp,ndp,nMin=1,nMax=3,kMin=0,kMax=1,scatteringPrecision=0.001,absorptionPrecision=0.001,spaceSize=40,interp=2):
dp = coerceDType(dp)
ndp = coerceDType(ndp)
nRange = np.linspace(nMin,nMax,spaceSize)
kRange = np.linspace(kMin,kMax,spaceSize)
scaSpace = np.zeros((spaceSize,spaceSize))
absSpace = np.zeros((spaceSize,spaceSize))
for ni,n in enumerate(nRange):
for ki,k in enumerate(kRange):
_derp = fastMie_SD(n+(1j*k),wavelength,dp,ndp)
scaSpace[ni][ki] = _derp[0]
absSpace[ni][ki] = _derp[1]
if interp is not None:
nRange = zoom(nRange,interp)
kRange = zoom(kRange,interp)
scaSpace = zoom(scaSpace,interp)
absSpace = zoom(absSpace,interp)
scaSolutions = np.where(np.logical_and(Bsca*(1-scatteringPrecision)<scaSpace, scaSpace<Bsca*(1+scatteringPrecision)))
absSolutions = np.where(np.logical_and(Babs*(1-absorptionPrecision)<absSpace, absSpace<Babs*(1+absorptionPrecision)))
validScattering = nRange[scaSolutions[0]]+1j*kRange[scaSolutions[1]]
validAbsorption = nRange[absSolutions[0]]+1j*kRange[absSolutions[1]]
return np.intersect1d(validScattering,validAbsorption)
def Inversion(Qsca,Qabs,wavelength,diameter,nMin=1,nMax=3,kMin=0.001,kMax=1,scatteringPrecision=0.010,absorptionPrecision=0.010,spaceSize=120,interp=2):
error = lambda measured,calculated: np.abs((calculated-measured)/measured)
nRange = np.linspace(nMin,nMax,spaceSize)
kRange = np.logspace(np.log10(kMin),np.log10(kMax),spaceSize)
scaSpace = np.zeros((spaceSize,spaceSize))
absSpace = np.zeros((spaceSize,spaceSize))
for ni,n in enumerate(nRange):
for ki,k in enumerate(kRange):
_derp = fastMieQ(n+(1j*k),wavelength,diameter)
scaSpace[ni][ki] = _derp[0]
absSpace[ni][ki] = _derp[1]
if interp is not None:
nRange = zoom(nRange,interp)
kRange = zoom(kRange,interp)
scaSpace = zoom(scaSpace,interp)
absSpace = zoom(absSpace,interp)
scaSolutions = np.where(np.logical_and(Qsca*(1-scatteringPrecision)<scaSpace, scaSpace<Qsca*(1+scatteringPrecision)))
absSolutions = np.where(np.logical_and(Qabs*(1-absorptionPrecision)<absSpace, absSpace<Qabs*(1+absorptionPrecision)))
validScattering = nRange[scaSolutions[0]]+1j*kRange[scaSolutions[1]]
validAbsorption = nRange[absSolutions[0]]+1j*kRange[absSolutions[1]]
solution = np.intersect1d(validScattering,validAbsorption)
# errors = [error()]
return solution
def Inversion_SD(Bsca,Babs,wavelength,dp,ndp,nMin=1,nMax=3,kMin=0,kMax=1,scatteringPrecision=0.001,absorptionPrecision=0.001,spaceSize=40,interp=2):
dp = coerceDType(dp)
ndp = coerceDType(ndp)
nRange = np.linspace(nMin,nMax,spaceSize)
kRange = np.linspace(kMin,kMax,spaceSize)
scaSpace = np.zeros((spaceSize,spaceSize))
absSpace = np.zeros((spaceSize,spaceSize))
for ni,n in enumerate(nRange):
for ki,k in enumerate(kRange):
_derp = fastMie_SD(n+(1j*k),wavelength,dp,ndp)
scaSpace[ni][ki] = _derp[0]
absSpace[ni][ki] = _derp[1]
if interp is not None:
nRange = zoom(nRange,interp)
kRange = zoom(kRange,interp)
scaSpace = zoom(scaSpace,interp)
absSpace = zoom(absSpace,interp)
scaSolutions = np.where(np.logical_and(Bsca*(1-scatteringPrecision)<scaSpace, scaSpace<Bsca*(1+scatteringPrecision)))
absSolutions = np.where(np.logical_and(Babs*(1-absorptionPrecision)<absSpace, absSpace<Babs*(1+absorptionPrecision)))
validScattering = nRange[scaSolutions[0]]+1j*kRange[scaSolutions[1]]
validAbsorption = nRange[absSolutions[0]]+1j*kRange[absSolutions[1]]
return np.intersect1d(validScattering,validAbsorption)
def _get_cluster_indices(self):
self.clusters = _np.intersect1d(
_np.where(self.density > self.min_density)[0],
_np.where(self.delta > self.min_delta)[0], assume_unique=True).astype(_np.intc)
self.nclusters = self.clusters.shape[0]
def _get_cluster_indices(self):
self.clusters = _np.intersect1d(
_np.where(self.density > self.min_density)[0],
_np.where(self.delta > self.min_delta)[0], assume_unique=True)
self.ncl = self.clusters.shape[0]
def select_regoin(img, vert, keep_shape=True, qmask=None, ):
'''Get a pixellist by a rectangular region
defined by
verts e.g. xs,xe,ys,ye = vert #x_start, x_end, y_start,y_end
(dimy, dimx,) = img.shape
Giving cut postion, start, end, width '''
import numpy as np
xs,xe,ys,ye = vert
if keep_shape:
img_= np.zeros_like( img )
#img_= np.zeros( [dimy,dimx])
try:
img_[ys:ye, xs:xe] = True
except:
img_[ys:ye, xs:xe,:] = True
pixellist_ = np.where( img_.ravel() )[0]
#pixellist_ = img_.ravel()
if qmask is not None:
b=np.where( qmask.flatten()==False )[0]
pixellist_ = np.intersect1d(pixellist_,b)
#imgx = img[pixellist_]
#imgx = imgx.reshape( xe-xs, ye-ys)
imgx = img_.ravel()
imgx[pixellist_] = img.ravel()[pixellist_]
imgx = imgx.reshape( img.shape )
else:
try:
imgx =img[ys:ye, xs:xe]
except:
imgx =img[ys:ye, xs:xe,:]
return imgx
def read_crop(self,varname,kt):
z2d = zeros((len(self.yidx),len(self.xidx)))
time = self.nc[0].variables['t'][kt]
#print('nt=%i / kt=%i'%(self.nt,kt))
if (kt>=self.nt):
print('kt is out of range max(kt)=%i'%(self.nt-1))
else:
for proc in range(self.nbproc):
varloc = self.nc[proc].variables[varname]
print('\r %12s - kt =%i - proc = %i'%(varname,kt,proc),end='')
ip,jp = proc%self.npx,proc//self.npx
iiglo = arange( ip*self.nxproc,(ip+1)*self.nxproc )
jjglo = arange( jp*self.nyproc,(jp+1)*self.nyproc )
ii = intersect1d( self.xidx, iiglo)
jj = intersect1d( self.yidx, jjglo)
if( (len(ii)>0) & (len(jj)>0)):
i0,i1=ii[0]-ip*self.nxproc,ii[-1]-ip*self.nxproc+1
j0,j1=jj[0]-jp*self.nyproc,jj[-1]-jp*self.nyproc+1
zz = varloc[kt,j0:j1,i0:i1]
i0,i1=ii[0]-self.xidx[0], ii[-1]+1-self.xidx[0]
j0,j1=jj[0]-self.yidx[0], jj[-1]+1-self.yidx[0]
z2d[j0:j1,i0:i1] = zz
return time,z2d
def get_data_stats(datasets):
data_stats_cols = ['all', 'non-fraud', 'fraud']
data_stats = pd.DataFrame(columns=data_stats_cols)
data_stats.loc['transactions'] = [d.shape[0] for d in datasets]
data_stats.loc['transactions/hour'] = [round(d['Local_Date'].apply(lambda x: x.hour).value_counts().sum()/24/366, 2) for d in datasets]
data_stats.loc['transactions/day'] = [round(d['Local_Date'].apply(lambda x: x.day).value_counts().sum() / 366, 2) for d in datasets]
data_stats.loc['transactions/week'] = [round(d['Local_Date'].apply(lambda x: x.week).value_counts().sum() / 52, 2) for d in datasets]
data_stats.loc['transactions/month'] = [round(d['Local_Date'].apply(lambda x: x.month).value_counts().sum() / 12, 2) for d in datasets]
data_stats.loc['cards'] = [len(d["CardID"].unique()) for d in datasets]
data_stats.loc['cards, single use'] = [sum(d["CardID"].value_counts() == 1) for d in datasets]
data_stats.loc['cards, multi use'] = [sum(d["CardID"].value_counts() > 1) for d in datasets]
cards_genuine = datasets[1]['CardID'].unique()
cards_fraud = datasets[2]['CardID'].unique()
data_stats.loc['fraud cards in genuine'] = ['-', '-', len(np.intersect1d(cards_genuine, cards_fraud)) / len(cards_fraud)]
data_stats.loc['first transaction'] = [min(d["Global_Date"]).date() for d in datasets]
data_stats.loc['last transaction'] = [max(d["Global_Date"]).date() for d in datasets]
data_stats.loc['min amount'] = [min(d["Amount"]) for d in datasets]
data_stats.loc['max amount'] = [max(d["Amount"]) for d in datasets]
data_stats.loc['avg amount'] = [np.average(d["Amount"]) for d in datasets]
data_stats.loc['num merchants'] = [len(d["MerchantID"].unique()) for d in datasets]
data_stats.loc['countries'] = [len(d["Country"].unique()) for d in datasets]
data_stats.loc['currencies'] = [len(d["Currency"].unique()) for d in datasets]
data_stats.loc['min trans/card'] = [min(d["CardID"].value_counts()) for d in datasets]
data_stats.loc['max trans/card'] = [max(d["CardID"].value_counts()) for d in datasets]
data_stats.loc['avg trans/card'] = [np.average(d["CardID"].value_counts()) for d in datasets]
return data_stats
def test_numpy_wrappers():
a1 = YTArray([1, 2, 3], 'cm')
a2 = YTArray([2, 3, 4, 5, 6], 'cm')
catenate_answer = [1, 2, 3, 2, 3, 4, 5, 6]
intersect_answer = [2, 3]
union_answer = [1, 2, 3, 4, 5, 6]
assert_array_equal(YTArray(catenate_answer, 'cm'), uconcatenate((a1, a2)))
assert_array_equal(catenate_answer, np.concatenate((a1, a2)))
assert_array_equal(YTArray(intersect_answer, 'cm'), uintersect1d(a1, a2))
assert_array_equal(intersect_answer, np.intersect1d(a1, a2))
assert_array_equal(YTArray(union_answer, 'cm'), uunion1d(a1, a2))
assert_array_equal(union_answer, np.union1d(a1, a2))
def test_subhalos():
ds = data_dir_load(g298)
total_sub = 0
total_int = 0
for hid in range(0, ds.index.particle_count["Group"]):
my_h = ds.halo("Group", hid)
h_ids = my_h["ID"]
for sid in range(int(my_h["subhalo_number"][0])):
my_s = ds.halo("Subhalo", (my_h.particle_identifier, sid))
total_sub += my_s["ID"].size
total_int += np.intersect1d(h_ids, my_s["ID"]).size
# Test that all subhalo particles are contained within
# their parent group.
assert_equal(total_sub, total_int)
def test_boolean_spheres_overlap():
r"""Test to make sure that boolean objects (spheres, overlap)
behave the way we expect.
Test overlapping spheres.
"""
ds = fake_amr_ds()
sp1 = ds.sphere([0.45, 0.45, 0.45], 0.15)
sp2 = ds.sphere([0.55, 0.55, 0.55], 0.15)
# Get indices of both.
i1 = sp1["index","morton_index"]
i2 = sp2["index","morton_index"]
# Make some booleans
bo1 = sp1 & sp2
bo2 = sp1 - sp2
bo3 = sp1 | sp2
bo4 = ds.union([sp1, sp2])
bo5 = ds.intersection([sp1, sp2])
# Now make sure the indices also behave as we expect.
lens = np.intersect1d(i1, i2)
apple = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, lens)
assert_array_equal(b2, apple)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = sp1 ^ sp2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_ellipsoids_overlap():
r"""Test to make sure that boolean objects (ellipsoids, overlap)
behave the way we expect.
Test overlapping ellipsoids.
"""
ds = fake_amr_ds()
ell1 = ds.ellipsoid([0.45]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1)
ell2 = ds.ellipsoid([0.55]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1)
# Get indices of both.
i1 = ell1["index","morton_index"]
i2 = ell2["index","morton_index"]
# Make some booleans
bo1 = ell1 & ell2
bo2 = ell1 - ell2
bo3 = ell1 | ell2
bo4 = ds.union([ell1, ell2])
bo5 = ds.intersection([ell1, ell2])
# Now make sure the indices also behave as we expect.
overlap = np.intersect1d(i1, i2)
diff = np.setdiff1d(i1, i2)
both = np.union1d(i1, i2)
b1 = bo1["index","morton_index"]
b1.sort()
b2 = bo2["index","morton_index"]
b2.sort()
b3 = bo3["index","morton_index"]
b3.sort()
assert_array_equal(b1, overlap)
assert_array_equal(b2, diff)
assert_array_equal(b3, both)
b4 = bo4["index","morton_index"]
b4.sort()
b5 = bo5["index","morton_index"]
b5.sort()
assert_array_equal(b3, b4)
assert_array_equal(b1, b5)
bo6 = ell1 ^ ell2
b6 = bo6["index", "morton_index"]
b6.sort()
assert_array_equal(b6, np.setxor1d(i1, i2))
def find_relative_parentage(self, child):
# Return two values: percent this halo gave to the other, and percent
# of the other that comes from this halo
overlap = np.intersect1d(self.particle_ids, child.particle_ids).size
of_child_from_me = float(overlap)/child.particle_ids.size
of_mine_from_me = float(overlap)/self.particle_ids.size
return of_child_from_me, of_mine_from_me
def _nonzero_intersection(m, m_hat):
'''Count the number of nonzeros in and between m and m_hat.
Returns
----------
m_nnz : number of nonzeros in m (w/o diagonal)
m_hat_nnz : number of nonzeros in m_hat (w/o diagonal)
intersection_nnz : number of nonzeros in intersection of m/m_hat
(w/o diagonal)
'''
n_features, _ = m.shape
m_no_diag = m.copy()
m_no_diag[np.diag_indices(n_features)] = 0
m_hat_no_diag = m_hat.copy()
m_hat_no_diag[np.diag_indices(n_features)] = 0
m_hat_nnz = len(np.nonzero(m_hat_no_diag.flat)[0])
m_nnz = len(np.nonzero(m_no_diag.flat)[0])
intersection_nnz = len(np.intersect1d(
np.nonzero(m_no_diag.flat)[0],
np.nonzero(m_hat_no_diag.flat)[0]
))
return m_nnz, m_hat_nnz, intersection_nnz
def _count_support_diff(m, m_hat):
n_features, _ = m.shape
m_no_diag = m.copy()
m_no_diag[np.diag_indices(n_features)] = 0
m_hat_no_diag = m_hat.copy()
m_hat_no_diag[np.diag_indices(n_features)] = 0
m_nnz = len(np.nonzero(m_no_diag.flat)[0])
m_hat_nnz = len(np.nonzero(m_hat_no_diag.flat)[0])
nnz_intersect = len(np.intersect1d(np.nonzero(m_no_diag.flat)[0],
np.nonzero(m_hat_no_diag.flat)[0]))
return m_nnz + m_hat_nnz - (2 * nnz_intersect)