def get_dist_func(name):
"""
Valid names:
Euclidean
Pearson
"""
if name == 'Euclidean':
if EUC_C_EXT_ENABLED:
return euclidean.euclidean
else:
return euc
elif name == 'Pearson':
#FIXME: Until I write my own c-extension, this is as good as it gets. And it's SLOW.
return lambda x, y: 1 - numpy.corrcoef(x,y)[0][1] #Again, we normalise -1 to distant and 1 to close. corrcoef returns the correlation matrix.
else:
raise ValueError, 'No distance function named: %s' % name
python类corrcoef()的实例源码
def _init_coefs(X, method='corrcoef'):
if method == 'corrcoef':
return np.corrcoef(X, rowvar=False), 1.0
elif method == 'cov':
init_cov = np.cov(X, rowvar=False)
return init_cov, np.max(np.abs(np.triu(init_cov)))
elif method == 'spearman':
return spearman_correlation(X, rowvar=False), 1.0
elif method == 'kendalltau':
return kendalltau_correlation(X, rowvar=False), 1.0
elif callable(method):
return method(X)
else:
raise ValueError(
("initialize_method must be 'corrcoef' or 'cov', "
"passed \'{}\' .".format(method))
)
def test_2d_w_missing(self):
# Test corrcoef on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
test = corrcoef(x)
control = np.corrcoef(x)
assert_almost_equal(test[:-1, :-1], control[:-1, :-1])
with catch_warn_mae():
warnings.simplefilter("ignore")
# ddof and bias have no or negligible effect on the function
assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1],
control[:-1, :-1])
def test_individual_stability_matrix():
"""
Tests individual_stability_matrix method on three gaussian blobs.
"""
import utils
import numpy as np
import scipy as sp
desired = np.load(home + '/git_repo/PyBASC/tests/ism_test.npy')
blobs = generate_blobs()
ism = utils.individual_stability_matrix(blobs, 20, 3)
#how to use test here?
# np.corrcoef(ism.flatten(),desired.flatten())
# np.testing.assert_equal(ism,desired)
#
# corr=np.array(sp.spatial.distance.cdist(ism, desired, metric = 'correlation'))
#
assert False
def plot_trace(n=0, lg=False):
plt.plot(trueC[n], c=col[2], clip_on=False, zorder=5, label='Truth')
plt.plot(solution, c=col[0], clip_on=False, zorder=7, label='Estimate')
plt.plot(y, c=col[7], alpha=.7, lw=1, clip_on=False, zorder=-10, label='Data')
if lg:
plt.legend(frameon=False, ncol=3, loc=(.1, .62), columnspacing=.8)
spks = np.append(0, solution[1:] - g * solution[:-1])
plt.text(800, 2.2, 'Correlation: %.3f' % (np.corrcoef(trueSpikes[n], spks)[0, 1]), size=24)
plt.gca().set_xticklabels([])
simpleaxis(plt.gca())
plt.ylim(0, 2.85)
plt.xlim(0, 1500)
plt.yticks([0, 2], [0, 2])
plt.xticks([300, 600, 900, 1200], ['', ''])
# init params
def pred_accuracy(y_true, y_pred):
y_true = sp.copy(y_true)
if len(sp.unique(y_true))==2:
print 'dichotomous trait, calculating AUC'
y_min = y_true.min()
y_max = y_true.max()
if y_min!= 0 or y_max!=1:
y_true[y_true==y_min]=0
y_true[y_true==y_max]=1
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
else:
print 'continuous trait, calculating COR'
cor = sp.corrcoef(y_true,y_pred)[0,1]
return cor
def calculate_residual_correlation_matrix(returns):
# find the market return constraining on the selected companies (first PCA)
# regress each stock on that and find correlation of residuals
returns_matrix = returns.as_matrix().transpose()
covar_matrix = np.cov(returns_matrix)
pca = decomposition.PCA(n_components=1)
pca.fit(covar_matrix)
X = pca.transform(covar_matrix)
regr = linear_model.LinearRegression()
dim = covar_matrix.shape[1]
res = np.zeros(shape=(dim,dim))
for x in range(0, dim):
regr = linear_model.LinearRegression()
regr = regr.fit(X, covar_matrix[:,x])
res[:,x] = covar_matrix[:,x] - regr.predict(X)
res_corr = np.corrcoef(res)
return pd.DataFrame(res_corr, index = returns.columns, columns = returns.columns)
all_correlations.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def all_correlations_fast_no_scipy(y, X):
'''
Cs = all_correlations(y, X)
Cs[i] = np.corrcoef(y, X[i])[0,1]
'''
X = np.asanyarray(X, float)
y = np.asanyarray(y, float)
xy = np.dot(X, y)
y_ = y.mean()
ys_ = y.std()
x_ = X.mean(1)
xs_ = X.std(1)
n = float(len(y))
ys_ += 1e-5 # Handle zeros in ys
xs_ += 1e-5 # Handle zeros in x
return (xy - x_ * y_ * n) / n / xs_ / ys_
def test_learn_codes():
"""Test learning of codes."""
thresh = 0.25
X, ds, z = simulate_data(n_trials, n_times, n_times_atom, n_atoms)
for solver in ('l_bfgs', 'ista', 'fista'):
z_hat = update_z(X, ds, reg, n_times_atom, solver=solver,
solver_kwargs=dict(factr=1e11, max_iter=50))
X_hat = construct_X(z_hat, ds)
assert_true(np.corrcoef(X.ravel(), X_hat.ravel())[1, 1] > 0.99)
assert_true(np.max(X - X_hat) < 0.1)
# Find position of non-zero entries
idx = np.ravel_multi_index(z[0].nonzero(), z[0].shape)
loc_x, loc_y = np.where(z_hat[0] > thresh)
# shift position by half the length of atom
idx_hat = np.ravel_multi_index((loc_x, loc_y), z_hat[0].shape)
# make sure that the positions are a subset of the positions
# in the original z
mask = np.in1d(idx_hat, idx)
assert_equal(np.sum(mask), len(mask))
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def get_corr_func(method):
if method in ['kendall', 'spearman']:
from scipy.stats import kendalltau, spearmanr
def _pearson(a, b):
return np.corrcoef(a, b)[0, 1]
def _kendall(a, b):
rs = kendalltau(a, b)
if isinstance(rs, tuple):
return rs[0]
return rs
def _spearman(a, b):
return spearmanr(a, b)[0]
_cor_methods = {
'pearson': _pearson,
'kendall': _kendall,
'spearman': _spearman
}
return _cor_methods[method]
test_extras.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_2d_w_missing(self):
# Test corrcoef on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
test = corrcoef(x)
control = np.corrcoef(x)
assert_almost_equal(test[:-1, :-1], control[:-1, :-1])
with catch_warn_mae():
warnings.simplefilter("ignore")
# ddof and bias have no or negligible effect on the function
assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1],
control[:-1, :-1])
def test_2d_w_missing(self):
# Test corrcoef on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
test = corrcoef(x)
control = np.corrcoef(x)
assert_almost_equal(test[:-1, :-1], control[:-1, :-1])
with catch_warn_mae():
warnings.simplefilter("ignore")
# ddof and bias have no or negligible effect on the function
assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1],
control[:-1, :-1])
def correlation_valid(x, y):
invalid = numpy.logical_or(numpy.isnan(x), numpy.isnan(y))
valid = numpy.logical_not(invalid)
valid_count = valid.sum()
if valid_count == 0:
corr = float('nan')
sd_x = float('nan')
sd_y = float('nan')
else:
sd_x = numpy.std(x[valid])
sd_y = numpy.std(y[valid])
if sd_x == 0 and sd_y == 0:
corr = 1.0
elif sd_x == 0 or sd_y == 0:
corr = 0.0
else:
corr = numpy.corrcoef(x[valid], y[valid])[0,1]
return corr, valid_count, sd_x, sd_y
def correlation_valid(x, y):
invalid = numpy.logical_or(numpy.isnan(x), numpy.isnan(y))
valid = numpy.logical_not(invalid)
valid_count = valid.sum()
if valid_count == 0:
corr = float('nan')
sd_x = float('nan')
sd_y = float('nan')
else:
sd_x = numpy.std(x[valid])
sd_y = numpy.std(y[valid])
if sd_x == 0 and sd_y == 0:
corr = 1.0
elif sd_x == 0 or sd_y == 0:
corr = 0.0
else:
corr = numpy.corrcoef(x[valid], y[valid])[0,1]
return corr, valid_count, sd_x, sd_y
def findcorrelation(self, A, B, k):
'''
Construct k by k matrix of Pearson product-moment correlation
coefficients for every combination of two columns in A and B
:param: A : first NMF solution matrix
:param: B : second NMF solution matrix, of same dimensions as A
:param: k : number of columns in each matrix A and B
Return: numpy array of dimensions k by k, where array[a][b] is the
correlation between column 'a' of X and column 'b'
Usage:
Called by instability()
'''
corrmatrix = []
for a in range(k):
for b in range(k):
c = np.corrcoef(A[:, a], B[:, b])
corrmatrix.append(c[0][1])
return np.asarray(corrmatrix).reshape(k, k)
def test_2d_w_missing(self):
# Test corrcoef on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
test = corrcoef(x)
control = np.corrcoef(x)
assert_almost_equal(test[:-1, :-1], control[:-1, :-1])
with catch_warn_mae():
warnings.simplefilter("ignore")
# ddof and bias have no or negligible effect on the function
assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1],
control[:-1, :-1])
def th_corrcoef(x):
"""
mimics np.corrcoef
"""
# calculate covariance matrix of rows
mean_x = th.mean(x, 1)
xm = x.sub(mean_x.expand_as(x))
c = xm.mm(xm.t())
c = c / (x.size(1) - 1)
# normalize covariance matrix
d = th.diag(c)
stddev = th.pow(d, 0.5)
c = c.div(stddev.expand_as(c))
c = c.div(stddev.expand_as(c).t())
# clamp between -1 and 1
c = th.clamp(c, -1.0, 1.0)
return c
def visualize_housing_data(df):
sns.set(style='whitegrid', context='notebook')
cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']
sns.pairplot(df[cols], size=2.5)
plt.show()
correlation_matrix = np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.5)
heatmap = sns.heatmap(
correlation_matrix,
cbar=True,
annot=True,
square=True,
fmt='.2f',
annot_kws={'size': 15},
yticklabels=cols,
xticklabels=cols,
)
plt.show()
def test_2d_with_missing(self):
# Test corrcoef on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
test = corrcoef(x)
control = np.corrcoef(x)
assert_almost_equal(test[:-1, :-1], control[:-1, :-1])
with suppress_warnings() as sup:
sup.filter(DeprecationWarning, "bias and ddof have no effect")
# ddof and bias have no or negligible effect on the function
assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1],
control[:-1, :-1])
assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1],
control[:-1, :-1])
def test_compute_corr():
"""Test Anscombe's Quartett
"""
x = np.array([10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5])
y = np.array([[8.04, 6.95, 7.58, 8.81, 8.33, 9.96,
7.24, 4.26, 10.84, 4.82, 5.68],
[9.14, 8.14, 8.74, 8.77, 9.26, 8.10,
6.13, 3.10, 9.13, 7.26, 4.74],
[7.46, 6.77, 12.74, 7.11, 7.81, 8.84,
6.08, 5.39, 8.15, 6.42, 5.73],
[8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8],
[6.58, 5.76, 7.71, 8.84, 8.47, 7.04,
5.25, 12.50, 5.56, 7.91, 6.89]])
r = compute_corr(x, y.T)
r2 = np.array([np.corrcoef(x, y[i])[0, 1]
for i in range(len(y))])
assert_allclose(r, r2)
assert_raises(ValueError, compute_corr, [1, 2], [])
def buildCorrelationEntries(self, name, gene, weight_db_logic, snps_by_rsid):
weights_in_gene = weight_db_logic.weights_by_gene[gene]
rsids_from_genes = weights_in_gene.keys()
#gather as much data as we can work on
related_rsids, related_data = self.buildRelatedData(rsids_from_genes, snps_by_rsid, weights_in_gene)
if len(related_rsids) == 0:
return []
self.updateFoundCorrelation(gene, name)
#correlation matrix of related SNP's data
array = numpy.array(related_data)
cor = numpy.corrcoef(array)
#translate into sql entries
entries = self.buildMatrixOutputEntries(cor, rsids_from_genes, related_rsids, snps_by_rsid)
if not len(entries):
raise NameError("Couldn not build correlation entries for (%s,%s)" %(name,gene))
return entries
testRegression.py 文件源码
项目:Machine_Learning_In_Action
作者: SunnyMarkLiu
项目源码
文件源码
阅读 41
收藏 0
点赞 0
评论 0
def testLocallyWeightedRegression():
datasArr, valuessArr = loadDataSet('datasets/ex0.txt')
m = np.shape(datasArr)[0]
predictValues = np.zeros(m)
for i in range(0, m):
predictValues[i] = \
locallyWeightedRegression(datasArr[i], datasArr, valuessArr, 0.01)
# ??????
xMat = np.matrix(datasArr)
valueMat = np.matrix(valuessArr)
plt.figure(figsize=(10, 10), facecolor="white")
plt.subplot(111)
plt.scatter(xMat[:, 1].flatten().A[0], valueMat.T.flatten().A[0])
# ???????
# ??????????
sortedIndexs = xMat[:, 1].argsort(0)
print "sortedIndexs:"
print sortedIndexs
sortedMat = xMat[sortedIndexs.flatten().A[0]]
plt.plot(sortedMat[:, 1], predictValues[sortedIndexs], c='red', linewidth=2)
plt.show()
# ?????????????
correlationCoefficients = np.corrcoef(predictValues, valueMat)
print "?????", correlationCoefficients
def rsq(self, tmin=None, tmax=None):
"""Correlation between observed and simulated series.
Notes
-----
For the calculation of this statistic the corrcoef method from numpy
is used.
>>> np.corrcoef(sim, obs)[0, 1]
Please refer to the Numpy Docs:
https://docs.scipy.org/doc/numpy/reference/generated/numpy.corrcoef.html#numpy.corrcoef
"""
sim = self.ml.simulate(tmin=tmin, tmax=tmax)
obs = self.ml.observations(tmin=tmin, tmax=tmax)
sim = sim[obs.index] # Make sure to correlate the same in time.
return np.corrcoef(sim, obs)[0, 1]
def PA(samples, variables):
datasets = 5000
eig_vals = []
for i in range(datasets):
data = np.random.standard_normal((variables, samples))
cor_ = np.corrcoef(data)
eig_vals.append(np.sort(np.linalg.eig(cor_)[0])[::-1])
quantile = (np.round(np.percentile(eig_vals, 95.0, axis=0), 4))
mean_ = (np.round(np.mean(eig_vals, axis=0), 4))
return quantile
def PCAdo(block, name):
cor_ = np.corrcoef(block.T)
eig_vals, eig_vecs = np.linalg.eig(cor_)
tot = sum(eig_vals)
var_exp = [(i / tot) * 100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
loadings = (eig_vecs * np.sqrt(eig_vals))
eig_vals = np.sort(eig_vals)[::-1]
print('Eigenvalues')
print(eig_vals)
print('Variance Explained')
print(var_exp)
print('Total Variance Explained')
print(cum_var_exp)
print('Loadings')
print(abs(loadings[:, 0]))
PAcorrect = PA(block.shape[0], block.shape[1])
print('Parallel Analisys')
pa = (eig_vals - (PAcorrect - 1))
print(pa)
print('Correlation Matrix')
print(pd.DataFrame.corr(block))
plt.plot(range(1,len(pa)+1), pa, '-o')
plt.grid(True)
plt.xlabel('Fatores')
plt.ylabel('Componentes')
plt.savefig('imgs/PCA' + name, bbox_inches='tight')
plt.clf()
plt.cla()
# plt.show()
def pearson_r(data_1, data_2):
return np.corrcoef(data_1, data_2)[0,1]
def person_sim(cls, x, y):
return 0.5 + 0.5 * np.corrcoef(x, y, rowvar=0)[0][1]
def test_pearson_r(data):
x, y = data
if np.allclose(x, x[0], atol=atol, equal_nan=True) or np.allclose(y, y[0], atol=atol, equal_nan=True):
assert np.isnan(dcst.pearson_r(x, y))
else:
assert np.isclose(dcst.pearson_r(x, y), original.pearson_r(x, y))
assert np.isclose(dcst.pearson_r(x, y), np.corrcoef(x, y)[0,1])
def pearson_r(x, y):
"""Compute Pearson correlation coefficient between two arrays."""
# Compute correlation matrix
corr_mat = np.corrcoef(x, y)
# Return entry [0,1]
return corr_mat[0,1]
def transform_to_correlation_dist(data):
y_corr = np.corrcoef(data.T)
# we just need the magnitude of the correlation and don't care whether it's positive or not
abs_corr = np.abs(y_corr)
return np.nan_to_num(abs_corr)