def compute_confusion_matrix(self,yp,yr):
'''
Compute the confusion matrix
'''
# Initialization
n = yp.size
C=int(yr.max())
self.confusion_matrix=sp.zeros((C,C))
# Compute confusion matrix
for i in range(n):
self.confusion_matrix[yp[i].astype(int)-1,yr[i].astype(int)-1] +=1
# Compute overall accuracy
self.OA=sp.sum(sp.diag(self.confusion_matrix))/n
# Compute Kappa
nl = sp.sum(self.confusion_matrix,axis=1)
nc = sp.sum(self.confusion_matrix,axis=0)
self.Kappa = ((n**2)*self.OA - sp.sum(nc*nl))/(n**2-sp.sum(nc*nl))
# TBD Variance du Kappa
python类sum()的实例源码
def glmnet_softmax(x):
d = x.shape
nas = scipy.any(scipy.isnan(x), axis = 1)
if scipy.any(nas):
pclass = scipy.zeros([d[0], 1])*scipy.NaN
if scipy.sum(nas) < d[0]:
pclass2 = glmnet_softmax(x[~nas, :])
pclass[~nas] = pclass2
result = pclass
else:
maxdist = x[:, 1]
pclass = scipy.ones([d[0], 1])
for i in range(1, d[1], 1):
t = x[:, i] > maxdist
pclass[t] = i
maxdist[t] = x[t, i]
result = pclass
return(result)
#=========================
def auc(y, prob, w):
if len(w) == 0:
mindiff = scipy.amin(scipy.diff(scipy.unique(prob)))
pert = scipy.random.uniform(0, mindiff/3, prob.size)
t, rprob = scipy.unique(prob + pert, return_inverse = True)
n1 = scipy.sum(y, keepdims = True)
n0 = y.shape[0] - n1
u = scipy.sum(rprob[y == 1]) - n1*(n1 + 1)/2
result = u/(n1*n0)
else:
op = scipy.argsort(prob)
y = y[op]
w = w[op]
cw = scipy.cumsum(w)
w1 = w[y == 1]
cw1 = scipy.cumsum(w1)
wauc = scipy.sum(w1*(cw[y == 1] - cw1))
sumw = cw1[-1]
sumw = sumw*(c1[-1] - sumw)
result = wauc/sumw
return(result)
#=========================
def cvcompute(mat, weights, foldid, nlams):
if len(weights.shape) > 1:
weights = scipy.reshape(weights, [weights.shape[0], ])
wisum = scipy.bincount(foldid, weights = weights)
nfolds = scipy.amax(foldid) + 1
outmat = scipy.ones([nfolds, mat.shape[1]])*scipy.NaN
good = scipy.zeros([nfolds, mat.shape[1]])
mat[scipy.isinf(mat)] = scipy.NaN
for i in range(nfolds):
tf = foldid == i
mati = mat[tf, ]
wi = weights[tf, ]
outmat[i, :] = wtmean(mati, wi)
good[i, 0:nlams[i]] = 1
N = scipy.sum(good, axis = 0)
cvcpt = dict()
cvcpt['cvraw'] = outmat
cvcpt['weights'] = wisum
cvcpt['N'] = N
return(cvcpt)
# end of cvcompute
#=========================
def glmnet_softmax(x):
d = x.shape
nas = scipy.any(scipy.isnan(x), axis = 1)
if scipy.any(nas):
pclass = scipy.zeros([d[0], 1])*scipy.NaN
if scipy.sum(nas) < d[0]:
pclass2 = glmnet_softmax(x[~nas, :])
pclass[~nas] = pclass2
result = pclass
else:
maxdist = x[:, 1]
pclass = scipy.ones([d[0], 1])
for i in range(1, d[1], 1):
t = x[:, i] > maxdist
pclass[t] = i
maxdist[t] = x[t, i]
result = pclass
return(result)
#=========================
def auc(y, prob, w):
if len(w) == 0:
mindiff = scipy.amin(scipy.diff(scipy.unique(prob)))
pert = scipy.random.uniform(0, mindiff/3, prob.size)
t, rprob = scipy.unique(prob + pert, return_inverse = True)
n1 = scipy.sum(y, keepdims = True)
n0 = y.shape[0] - n1
u = scipy.sum(rprob[y == 1]) - n1*(n1 + 1)/2
result = u/(n1*n0)
else:
op = scipy.argsort(prob)
y = y[op]
w = w[op]
cw = scipy.cumsum(w)
w1 = w[y == 1]
cw1 = scipy.cumsum(w1)
wauc = scipy.sum(w1*(cw[y == 1] - cw1))
sumw = cw1[-1]
sumw = sumw*(c1[-1] - sumw)
result = wauc/sumw
return(result)
#=========================
def cvcompute(mat, weights, foldid, nlams):
if len(weights.shape) > 1:
weights = scipy.reshape(weights, [weights.shape[0], ])
wisum = scipy.bincount(foldid, weights = weights)
nfolds = scipy.amax(foldid) + 1
outmat = scipy.ones([nfolds, mat.shape[1]])*scipy.NaN
good = scipy.zeros([nfolds, mat.shape[1]])
mat[scipy.isinf(mat)] = scipy.NaN
for i in range(nfolds):
tf = foldid == i
mati = mat[tf, ]
wi = weights[tf, ]
outmat[i, :] = wtmean(mati, wi)
good[i, 0:nlams[i]] = 1
N = scipy.sum(good, axis = 0)
cvcpt = dict()
cvcpt['cvraw'] = outmat
cvcpt['weights'] = wisum
cvcpt['N'] = N
return(cvcpt)
# end of cvcompute
#=========================
def auc(y, prob, w):
if len(w) == 0:
mindiff = scipy.amin(scipy.diff(scipy.unique(prob)))
pert = scipy.random.uniform(0, mindiff/3, prob.size)
t, rprob = scipy.unique(prob + pert, return_inverse = True)
n1 = scipy.sum(y, keepdims = True)
n0 = y.shape[0] - n1
u = scipy.sum(rprob[y == 1]) - n1*(n1 + 1)/2
result = u/(n1*n0)
else:
op = scipy.argsort(prob)
y = y[op]
w = w[op]
cw = scipy.cumsum(w)
w1 = w[y == 1]
cw1 = scipy.cumsum(w1)
wauc = scipy.sum(w1*(cw[y == 1] - cw1))
sumw = cw1[-1]
sumw = sumw*(c1[-1] - sumw)
result = wauc/sumw
return(result)
#=========================
def cvcompute(mat, weights, foldid, nlams):
if len(weights.shape) > 1:
weights = scipy.reshape(weights, [weights.shape[0], ])
wisum = scipy.bincount(foldid, weights = weights)
nfolds = scipy.amax(foldid) + 1
outmat = scipy.ones([nfolds, mat.shape[1]])*scipy.NaN
good = scipy.zeros([nfolds, mat.shape[1]])
mat[scipy.isinf(mat)] = scipy.NaN
for i in range(nfolds):
tf = foldid == i
mati = mat[tf, ]
wi = weights[tf, ]
outmat[i, :] = wtmean(mati, wi)
good[i, 0:nlams[i]] = 1
N = scipy.sum(good, axis = 0)
cvcpt = dict()
cvcpt['cvraw'] = outmat
cvcpt['weights'] = wisum
cvcpt['N'] = N
return(cvcpt)
# end of cvcompute
#=========================
def glmnet_softmax(x):
d = x.shape
nas = scipy.any(scipy.isnan(x), axis = 1)
if scipy.any(nas):
pclass = scipy.zeros([d[0], 1])*scipy.NaN
if scipy.sum(nas) < d[0]:
pclass2 = glmnet_softmax(x[~nas, :])
pclass[~nas] = pclass2
result = pclass
else:
maxdist = x[:, 1]
pclass = scipy.ones([d[0], 1])
for i in range(1, d[1], 1):
t = x[:, i] > maxdist
pclass[t] = i
maxdist[t] = x[t, i]
result = pclass
return(result)
#=========================
def cvcompute(mat, weights, foldid, nlams):
if len(weights.shape) > 1:
weights = scipy.reshape(weights, [weights.shape[0], ])
wisum = scipy.bincount(foldid, weights = weights)
nfolds = scipy.amax(foldid) + 1
outmat = scipy.ones([nfolds, mat.shape[1]])*scipy.NaN
good = scipy.zeros([nfolds, mat.shape[1]])
mat[scipy.isinf(mat)] = scipy.NaN
for i in range(nfolds):
tf = foldid == i
mati = mat[tf, ]
wi = weights[tf, ]
outmat[i, :] = wtmean(mati, wi)
good[i, 0:nlams[i]] = 1
N = scipy.sum(good, axis = 0)
cvcpt = dict()
cvcpt['cvraw'] = outmat
cvcpt['weights'] = wisum
cvcpt['N'] = N
return(cvcpt)
# end of cvcompute
#=========================
def _alignment(self,ssignal,ksignal):
starta = 0
for i in range(len(ssignal))[0::2]:
if ssignal[i]<-100/32767.0 or ssignal[i]>100/32767.0:
starta = i
break
startb=0
for i in range(len(ksignal))[0::2]:
if ksignal[i]<-100/32767.0 or ksignal[i]>100/32767.0:
startb = i
break
start=starta-100
base = ssignal[start:start+5000]
small=1000000
index=0
for i in range(startb-1000,startb-1000+10000)[0::2]:
signal = ksignal[i:i+5000]
score = math.sqrt(sp.sum(sp.square(sp.array(list(base-signal),sp.float32))))
if score<small:
index=i
small=score
return start,index
#return 0,0
def tstat(beta, var, sigma, q, N, log=False):
"""
Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
"""
ts = beta / np.sqrt(var * sigma)
print ts
# ts = beta / np.sqrt(sigma)
# ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
# sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
if log:
ps = 2.0 + (stats.t.logsf(np.abs(ts), N - q))
else:
ps = 2.0 * (stats.t.sf(np.abs(ts), N - q))
print ps
# if not len(ts) == 1 or not len(ps) == 1:
# raise Exception("Something bad happened :(")
# return ts, ps
return ts.sum(), ps.sum()
def compute_confusion_matrix(self,yp,yr):
'''
Compute the confusion matrix
'''
# Initialization
n = yp.size
C=int(yr.max())
self.confusion_matrix=sp.zeros((C,C))
# Compute confusion matrix
for i in range(n):
self.confusion_matrix[yp[i].astype(int)-1,yr[i].astype(int)-1] +=1
# Compute overall accuracy
self.OA=sp.sum(sp.diag(self.confusion_matrix))/n
# Compute Kappa
nl = sp.sum(self.confusion_matrix,axis=1)
nc = sp.sum(self.confusion_matrix,axis=0)
self.Kappa = ((n**2)*self.OA - sp.sum(nc*nl))/(n**2-sp.sum(nc*nl))
# TBD Variance du Kappa
def __MR_W_D_matrix(self,img,labels):
s = sp.amax(labels)+1
vect = self.__MR_superpixel_mean_vector(img,labels)
adj = self.__MR_get_adj_loop(labels)
W = sp.spatial.distance.squareform(sp.spatial.distance.pdist(vect))
W = sp.exp(-1*W / self.weight_parameters['delta'])
W[adj.astype(np.bool)] = 0
D = sp.zeros((s,s)).astype(float)
for i in range(s):
D[i, i] = sp.sum(W[i])
return W,D
def tstat(beta, var, sigma, q, N, log=False):
"""
Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
"""
ts = beta / np.sqrt(var * sigma)
# ts = beta / np.sqrt(sigma)
# ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
# sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
if log:
ps = 2.0 + (stats.t.logsf(np.abs(ts), N - q))
else:
ps = 2.0 * (stats.t.sf(np.abs(ts), N - q))
if not len(ts) == 1 or not len(ps) == 1:
raise Exception("Something bad happened :(")
# return ts, ps
return ts.sum(), ps.sum()
def model_error(f, x, y):
return sp.sum((f(x) - y) ** 2)
# Function import tables with serial number of day
# and Adjusted Closing Prices
SLIC_new_cityscapes_training_server_1.py 文件源码
项目:SLIC_cityscapes
作者: wpqmanu
项目源码
文件源码
阅读 51
收藏 0
点赞 0
评论 0
def calcdistance_mat(self, points, center, spatialmax):
## -- L2norm optimized -- ##
center = scipy.array(center)
location_center=center[:2]
color_center=center[2:]
location_points=points[:,:,:2]
color_points=points[:,:,2:]
difs_location=location_points-location_center
difs_color=1-np.equal(color_points,color_center)
if len(difs_color.shape)==2:
difs_color=np.expand_dims(difs_color, axis=2)
difs=np.concatenate((difs_location,difs_color),axis=2)
norm = (difs ** 2).astype(float)
norm[:, :, 0:2] *= (float(self.MM) / (spatialmax * spatialmax)) # color weight on location term
norm = scipy.sum(norm, 2)
return norm
SLIC_new_cityscapes_training_server_parallel_spark.py 文件源码
项目:SLIC_cityscapes
作者: wpqmanu
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def calcdistance_mat(self, points, center, spatialmax):
## -- L2norm optimized -- ##
center = scipy.array(center)
location_center=center[:2]
color_center=center[2:]
location_points=points[:,:,:2]
color_points=points[:,:,2:]
difs_location=location_points-location_center
difs_color=1-np.equal(color_points,color_center)
if len(difs_color.shape)==2:
difs_color=np.expand_dims(difs_color, axis=2)
difs=np.concatenate((difs_location,difs_color),axis=2)
norm = (difs ** 2).astype(float)
norm[:, :, 0:2] *= (float(self.MM) / (spatialmax * spatialmax)) # color weight on location term
norm = scipy.sum(norm, 2)
return norm
def calcdistance_mat(self, points, center, spatialmax):
## -- L2norm optimized -- ##
center = scipy.array(center)
location_center=center[:2]
color_center=center[2:]
location_points=points[:,:,:2]
color_points=points[:,:,2:]
difs_location=location_points-location_center
difs_color=1-np.equal(color_points,color_center)
if len(difs_color.shape)==2:
difs_color=np.expand_dims(difs_color, axis=2)
difs=np.concatenate((difs_location,difs_color),axis=2)
norm = (difs ** 2).astype(float)
norm[:, :, 0:2] *= (float(self.MM) / (spatialmax * spatialmax)) # color weight on location term
norm = scipy.sum(norm, 2)
return norm
SLIC_new_cityscapes_training_server_parallel.py 文件源码
项目:SLIC_cityscapes
作者: wpqmanu
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def calcdistance_mat(self, points, center, spatialmax):
## -- L2norm optimized -- ##
center = scipy.array(center)
location_center=center[:2]
color_center=center[2:]
location_points=points[:,:,:2]
color_points=points[:,:,2:]
difs_location=location_points-location_center
difs_color=1-np.equal(color_points,color_center)
if len(difs_color.shape)==2:
difs_color=np.expand_dims(difs_color, axis=2)
difs=np.concatenate((difs_location,difs_color),axis=2)
norm = (difs ** 2).astype(float)
norm[:, :, 0:2] *= (float(self.MM) / (spatialmax * spatialmax)) # color weight on location term
norm = scipy.sum(norm, 2)
return norm
def _parse_plink_snps_(genotype_file, snp_indices):
plinkf = plinkfile.PlinkFile(genotype_file)
samples = plinkf.get_samples()
num_individs = len(samples)
num_snps = len(snp_indices)
raw_snps = sp.empty((num_snps,num_individs),dtype='int8')
#If these indices are not in order then we place them in the right place while parsing SNPs.
snp_order = sp.argsort(snp_indices)
ordered_snp_indices = list(snp_indices[snp_order])
ordered_snp_indices.reverse()
print 'Iterating over file to load SNPs'
snp_i = 0
next_i = ordered_snp_indices.pop()
line_i = 0
max_i = ordered_snp_indices[0]
while line_i <= max_i:
if line_i < next_i:
plinkf.next()
elif line_i==next_i:
line = plinkf.next()
snp = sp.array(line, dtype='int8')
bin_counts = line.allele_counts()
if bin_counts[-1]>0:
mode_v = sp.argmax(bin_counts[:2])
snp[snp==3] = mode_v
s_i = snp_order[snp_i]
raw_snps[s_i]=snp
if line_i < max_i:
next_i = ordered_snp_indices.pop()
snp_i+=1
line_i +=1
plinkf.close()
assert snp_i==len(raw_snps), 'Failed to parse SNPs?'
num_indivs = len(raw_snps[0])
freqs = sp.sum(raw_snps,1, dtype='float32')/(2*float(num_indivs))
return raw_snps, freqs
def get_ld_tables(snps, ld_radius=100, ld_window_size=0):
"""
Calculates LD tables, and the LD score in one go...
"""
ld_dict = {}
m,n = snps.shape
print m,n
ld_scores = sp.ones(m)
ret_dict = {}
for snp_i, snp in enumerate(snps):
# Calculate D
start_i = max(0, snp_i - ld_radius)
stop_i = min(m, snp_i + ld_radius + 1)
X = snps[start_i: stop_i]
D_i = sp.dot(snp, X.T) / n
r2s = D_i ** 2
ld_dict[snp_i] = D_i
lds_i = sp.sum(r2s - (1-r2s) / (n-2),dtype='float32')
#lds_i = sp.sum(r2s - (1-r2s)*empirical_null_r2)
ld_scores[snp_i] =lds_i
ret_dict['ld_dict']=ld_dict
ret_dict['ld_scores']=ld_scores
if ld_window_size>0:
ref_ld_matrices = []
for i, wi in enumerate(range(0, m, ld_window_size)):
start_i = wi
stop_i = min(m, wi + ld_window_size)
curr_window_size = stop_i - start_i
X = snps[start_i: stop_i]
D = sp.dot(X, X.T) / n
ref_ld_matrices.append(D)
ret_dict['ref_ld_matrices']=ref_ld_matrices
return ret_dict
def predict(self,xt,tau=None,proba=None):
'''
Function that predict the label for sample xt using the learned model
Inputs:
xt: the samples to be classified
Outputs:
y: the class
K: the decision value for each class
'''
## Get information from the data
nt = xt.shape[0] # Number of testing samples
C = self.ni.shape[0] # Number of classes
## Initialization
K = sp.empty((nt,C))
if tau is None:
TAU=self.tau
else:
TAU=tau
for c in range(C):
invCov,logdet = self.compute_inverse_logdet(c,TAU)
cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant
xtc = xt-self.mean[c,:]
temp = sp.dot(invCov,xtc.T).T
K[:,c] = sp.sum(xtc*temp,axis=1)+cst
del temp,xtc
##
## Assign the label save in classnum to the minimum value of K
yp = self.classnum[sp.argmin(K,1)]
## Reassign label with real value
if proba is None:
return yp
else:
return yp,K
def compute_inverse_logdet(self,c,tau):
Lr = self.L[c,:]+tau # Regularized eigenvalues
temp = self.Q[c,:,:]*(1/Lr)
invCov = sp.dot(temp,self.Q[c,:,:].T) # Pre compute the inverse
logdet = sp.sum(sp.log(Lr)) # Compute the log determinant
return invCov,logdet
def BIC(self,x,y,tau=None):
'''
Computes the Bayesian Information Criterion of the model
'''
## Get information from the data
C,d = self.mean.shape
n = x.shape[0]
## Initialization
if tau is None:
TAU=self.tau
else:
TAU=tau
## Penalization
P = C*(d*(d+3)/2) + (C-1)
P *= sp.log(n)
## Compute the log-likelihood
L = 0
for c in range(C):
j = sp.where(y==(c+1))[0]
xi = x[j,:]
invCov,logdet = self.compute_inverse_logdet(c,TAU)
cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant
xi -= self.mean[c,:]
temp = sp.dot(invCov,xi.T).T
K = sp.sum(xi*temp,axis=1)+cst
L +=sp.sum(K)
del K,xi
return L + P
def error(f, x, y):
return sp.sum((f(x)-y)**2)
def compare_images(img1, img2):
# normalize to compensate for exposure difference, this may be unnecessary
# consider disabling it
img1 = normalize(img1)
img2 = normalize(img2)
# calculate the difference and its norms
diff = img1 - img2 # elementwise for scipy arrays
m_norm = sum(abs(diff)) # Manhattan norm
z_norm = norm(diff.ravel(), 0) # Zero norm
return (m_norm, z_norm)
def geom_std(values: t.List[float]) -> float:
"""
Calculates the geometric standard deviation for the passed values.
Source: https://en.wikipedia.org/wiki/Geometric_standard_deviation
"""
import scipy.stats as stats
import scipy as sp
gmean = stats.gmean(values)
return sp.exp(sp.sqrt(sp.sum([sp.log(x / gmean) ** 2 for x in values]) / len(values)))
def wtmean(mat,weights):
if len(weights.shape) == 1:
weights = scipy.reshape(weights, [scipy.size(weights), 1])
wmat = isfinite(mat)*weights
mat[isnan(mat)] = 0
swmat = mat*wmat
tf = weights != 0
tf = tf[:,0]
y = scipy.sum(swmat[tf, :], axis = 0)/scipy.sum(wmat, axis = 0)
return y
# end of wtmean