def pearson(y_true, y_pred):
"""
Calculate Pearson product-moment correlation coefficient between ``y_true``
and ``y_pred``.
:param y_true: The true/actual/gold labels for the data.
:type y_true: array-like of float
:param y_pred: The predicted/observed labels for the data.
:type y_pred: array-like of float
:returns: Pearson product-moment correlation coefficient if well-defined,
else 0
"""
ret_score = pearsonr(y_true, y_pred)[0]
return ret_score if not np.isnan(ret_score) else 0.0
python类pearsonr()的实例源码
def pearson_correlation(a,b,topics):
from scipy.stats import pearsonr
a = fill_list_from_dict(a,topics)
b = fill_list_from_dict(b,topics)
return pearsonr(a,b)[0]
def test_phase_randomize():
from brainiak.utils.utils import phase_randomize
import numpy as np
from scipy.fftpack import fft
import math
from scipy.stats import pearsonr
# Generate auto-correlated signals
nv = 2
T = 100
ns = 3
D = np.zeros((nv, T, ns))
for v in range(nv):
for s in range(ns):
D[v, :, s] = np.sin(np.linspace(0, math.pi * 5 * (v + 1), T)) + \
np.sin(np.linspace(0, math.pi * 6 * (s + 1), T))
freq = fft(D, axis=1)
D_pr = phase_randomize(D)
freq_pr = fft(D_pr, axis=1)
p_corr = pearsonr(np.angle(freq).flatten(), np.angle(freq_pr).flatten())[0]
assert np.isclose(abs(freq), abs(freq_pr)).all(), \
"Amplitude spectrum not preserved under phase randomization"
assert abs(p_corr) < 0.03, \
"Phases still correlated after randomization"
def plot_pearson(name):
"""Plot the pearsin coeff of the neurons for each layer"""
data_array = utils.get_data(name)
ws = data_array['weights']
f = plt.figure(figsize=(12, 8))
axes = f.add_subplot(111)
#The number of neurons in each layer -
#TODO need to change it to be auto
sizes =[10,7, 5, 4,3,2 ]
#The mean of pearson coeffs of all the layers
pearson_mean =[]
#Go over all the layers
for layer in range(len(sizes)):
inner_pearson_mean =[]
#Go over all the weights in the layer
for k in range(len(ws)):
ws_current = np.squeeze(ws[k][0][0][-1])
#Go over the neurons
for neuron in range(len(ws_current[layer])):
person_t = []
#Go over the rest of the neurons
for neuron_second in range(neuron+1, len(ws_current[layer])):
pearson_c, p_val =sis.pearsonr(ws_current[layer][neuron], ws_current[layer][neuron_second])
person_t.append(pearson_c)
inner_pearson_mean.append(np.mean(person_t))
pearson_mean.append(np.mean(inner_pearson_mean))
#Plot the coeff
axes.bar(np.arange(1,7), np.abs(np.array(pearson_mean))*np.sqrt(sizes), align='center')
axes.set_xlabel('Layer')
axes.set_ylabel('Abs(Pearson)*sqrt(N_i)')
rects = axes.patches
# Now make some labels
labels = ["L%d (%d nuerons)" % (i, j) for i, j in zip(range(len(rects)), sizes)]
plt.xticks(np.arange(1,7), labels)
def circ_corrcc(alpha, x):
"""Correlation coefficient between one circular and one linear random
variable.
Args:
alpha: vector
Sample of angles in radians
x: vector
Sample of linear random variable
Returns:
rho: float
Correlation coefficient
pval: float
p-value
Code taken from the Circular Statistics Toolbox for Matlab
By Philipp Berens, 2009
Python adaptation by Etienne Combrisson
"""
if len(alpha) is not len(x):
raise ValueError('The length of alpha and x must be the same')
n = len(alpha)
# Compute correlation coefficent for sin and cos independently
rxs = pearsonr(x,np.sin(alpha))[0]
rxc = pearsonr(x,np.cos(alpha))[0]
rcs = pearsonr(np.sin(alpha),np.cos(alpha))[0]
# Compute angular-linear correlation (equ. 27.47)
rho = np.sqrt((rxc**2 + rxs**2 - 2*rxc*rxs*rcs)/(1-rcs**2));
# Compute pvalue
pval = 1 - chi2.cdf(n*rho**2,2);
return rho, pval
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
if len(random_samples) == 0:
random_samples = np.zeros(A.shape[1],dtype=np.bool)
random_samples[:min(s,A.shape[1])] = True
np.random.shuffle(random_samples)
dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
pear = pearsonr(dist_x,dist_y)
spear = spearmanr(dist_x,dist_y)
if pvalues:
return pear,spear
else:
return pear[0],spear[0]
def sum_corr(view1,view2,flag=''):
print("test correlation")
corr = 0
for i,j in zip(view1,view2):
corr += measures.pearsonr(i,j)[0]
print('avg sum corr ::',flag,'::',corr/len(view1))
def cal_sim(model,ind1,ind2=1999):
view1 = np.load("test_v1.npy")[0:ind1]
view2 = np.load("test_v2.npy")[0:ind2]
label1 = np.load('test_l.npy')
x1 = project(model,[view1,np.zeros_like(view1)])
x2 = project(model,[np.zeros_like(view2),view2])
label2 = []
count = 0
MAP=0
for i,j in enumerate(x1):
cor = []
AP=0
for y in x2:
temp1 = j.tolist()
temp2 = y.tolist()
cor.append(pearsonr(temp1,temp2))
#if i == np.argmax(cor):
# count+=1
#val=[(q,(i*ind1+p))for p,q in enumerate(cor)]
val=[(q,p)for p,q in enumerate(cor)]
val.sort()
val.reverse()
label2.append(val[0:4])
t = [w[1]for w in val[0:7]]
#print t
for x,y in enumerate(t):
if y in range(i,i+5):
AP+=1/(x+1)
print(t)
print(AP)
MAP+=AP
#print 'accuracy :- ',float(count)*100/ind1,'%'
print('MAP is : ',MAP/ind1)
def pearson_scorer(estimator, X, y):
logging.info('predicting ...')
predicted = estimator.predict(y)
return pearsonr(list(predicted), y)
def calc_correl(self, dev_pred, test_pred):
dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
test_prs, _ = pearsonr(test_pred, self.test_y_org)
dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
test_spr, _ = spearmanr(test_pred, self.test_y_org)
dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
test_tau, _ = kendalltau(test_pred, self.test_y_org)
return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
def check_similarity_match(X_embed, S):
"""
Since SimEcs are supposed to project the data into an embedding space where the target similarities
can be linearly approximated, check if X_embed*X_embed^T = S
(check mean squared error and Spearman correlation coefficient)
Inputs:
- X_embed: Nxd matrix with coordinates in the embedding space
- S: NxN matrix with target similarities (do whatever transformations were done before using this
as input to the SimEc, e.g. centering, etc.)
Returns:
- msq, rho, r: mean squared error, Spearman and Pearson correlation coefficent between linear kernel of embedding
and target similarities (mean squared error is more exact, corrcoef a more relaxed error measure)
"""
# compute linear kernel as approximated similarities
S_approx = X_embed.dot(X_embed.T)
# to get results that are comparable across similarity measures, we have to normalize them somehow,
# in this case by dividing by the absolute max value of the target similarity matrix
n = np.max(np.abs(S))
S_norm = S/n
S_approx /= n
# compute mean squared error
msqe = np.mean((S_norm - S_approx) ** 2)
# compute Spearman correlation coefficient
rho = spearmanr(S_norm.flatten(), S_approx.flatten())[0]
# compute Pearson correlation coefficient
r = pearsonr(S_norm.flatten(), S_approx.flatten())[0]
return msqe, rho, r
def compute_score(self, conf, hy):
conf['_r2'] = r2_score(self.test_y, hy)
conf['_spearmanr'] = spearmanr(self.test_y, hy)[0]
conf['_pearsonr'] = pearsonr(self.test_y, hy)[0]
conf['_score'] = conf['_' + self.score]
# print(conf)
generate_corr_data.py 文件源码
项目:tensorflow-cnn-time-series
作者: philipperemy
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def generate_two_correlated_time_series(size, rho):
num_samples = size
num_variables = 2
cov = [[1.0, rho], [rho, 1.0]]
L = np.linalg.cholesky(cov)
uncorrelated = np.random.standard_normal((num_variables, num_samples))
correlated = np.dot(L, uncorrelated)
x, y = correlated
rho, p_val = stats.pearsonr(x, y)
return x, y, rho
demo_corr.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def _plot_correlation_func(x, y):
r, p = pearsonr(x, y)
title = "Cor($X_1$, $X_2$) = %.3f" % r
pylab.scatter(x, y)
pylab.title(title)
pylab.xlabel("$X_1$")
pylab.ylabel("$X_2$")
f1 = scipy.poly1d(scipy.polyfit(x, y, 1))
pylab.plot(x, f1(x), "r--", linewidth=2)
# pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in
# [0,1,2,3,4]])
def SubCorr_statistic(self,data_x=None,data_y=None):
if data_x is None:
data_x=self.data_x
if data_y is None:
data_y=self.data_y
dx = shape(data_x)[1]
stats_value = zeros(dx)
for dd in range(dx):
stats_value[dd] = pearsonr(data_x[:,[dd]],data_y)[0]**2
SubCorr = sum(stats_value)/float(dx)
return SubCorr
test_analytics.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def test_corr(self):
tm._skip_if_no_scipy()
import scipy.stats as stats
# full overlap
self.assertAlmostEqual(self.ts.corr(self.ts), 1)
# partial overlap
self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1)
self.assertTrue(isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)))
ts1 = self.ts[:15].reindex(self.ts.index)
ts2 = self.ts[5:].reindex(self.ts.index)
self.assertTrue(isnull(ts1.corr(ts2, min_periods=12)))
# No overlap
self.assertTrue(np.isnan(self.ts[::2].corr(self.ts[1::2])))
# all NA
cp = self.ts[:10].copy()
cp[:] = np.nan
self.assertTrue(isnull(cp.corr(cp)))
A = tm.makeTimeSeries()
B = tm.makeTimeSeries()
result = A.corr(B)
expected, _ = stats.pearsonr(A, B)
self.assertAlmostEqual(result, expected)
def de_ps(X,y):
dim = X.shape[1]
de = min(2000,dim)
clf = SelectKBest(lambda X, Y: np.array(map(lambda x:pearsonr(x, Y), X.T)).T, k=de)
clf.fit(X,y)
def _func(X1,X2):
return clf.transform(X1),clf.transform(X2)
return _func
def pearson(mat1, mat2):
"""Root mean square error between two matrices, ignoring zeroes"""
assert mat1.shape == mat2.shape
#convert to vectors
vec1 = mat1.flatten()
vec2 = mat2.flatten()
#remove zeroes
nonzero = [i for i in range(len(vec1)) if vec1[i] != 0 and vec2[i] != 0]
vec1 = vec1[nonzero]
vec2 = vec2[nonzero]
r, p = st.pearsonr(vec1, vec2)
return r
def RSA(m1,m2):
'''RSA analysis will compare the similarity of two matrices
'''
from scipy.stats import pearsonr
import scipy.linalg
import numpy
# This will take the diagonal of each matrix (and the other half is changed to nan) and flatten to vector
vectorm1 = m1.mask(numpy.triu(numpy.ones(m1.shape)).astype(numpy.bool)).values.flatten()
vectorm2 = m2.mask(numpy.triu(numpy.ones(m2.shape)).astype(numpy.bool)).values.flatten()
# Now remove the nans
m1defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm1,dtype=float)))
m2defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm2,dtype=float)))
idx = numpy.intersect1d(m1defined,m2defined)
return pearsonr(vectorm1[idx],vectorm2[idx])[0]
def forward(self, bottom, top):
"""Compute the SROCC and LCC and output them to top."""
#ipdb.set_trace()
testPreds = bottom[0].data
testPreds = np.reshape(testPreds,testPreds.shape[0])
testLabels = bottom[1].data
testLabels = np.reshape(testLabels,testLabels.shape[0])
top[0].data[...] = stats.spearmanr(testPreds, testLabels)[0]
top[1].data[...] = stats.pearsonr(testPreds, testLabels)[0]