def _generate_sample_from_state(self, state, random_state=None):
if random_state is None:
random_state = self.random_state
random_state = check_random_state(random_state)
cur_means = self.means_[state]
cur_covs = self.covars_[state]
cur_weights = self.weights_[state]
i_gauss = random_state.choice(self.n_mix, p=cur_weights)
mean = cur_means[i_gauss]
if self.covariance_type == 'tied':
cov = cur_covs
else:
cov = cur_covs[i_gauss]
return sample_gaussian(mean, cov, self.covariance_type,
random_state=random_state)
python类cov()的实例源码
def calculate_beta(self):
"""
.. math::
\\beta_a = \\frac{\mathrm{Cov}(r_a,r_p)}{\mathrm{Var}(r_p)}
http://en.wikipedia.org/wiki/Beta_(finance)
"""
# it doesn't make much sense to calculate beta for less than two
# values, so return none.
if len(self.algorithm_returns) < 2:
return 0.0
returns_matrix = np.vstack([self.algorithm_returns,
self.benchmark_returns])
C = np.cov(returns_matrix, ddof=1)
algorithm_covariance = C[0][1]
benchmark_variance = C[1][1]
beta = algorithm_covariance / benchmark_variance
return beta
def test_trace_sqrt_product_value(self):
"""Test that `trace_sqrt_product` gives the correct value."""
np.random.seed(0)
# Make num_examples > num_features to ensure scipy's sqrtm function
# doesn't return a complex matrix.
test_pool_real_a = np.float32(np.random.randn(512, 256))
test_pool_gen_a = np.float32(np.random.randn(512, 256))
cov_real = np.cov(test_pool_real_a, rowvar=False)
cov_gen = np.cov(test_pool_gen_a, rowvar=False)
trace_sqrt_prod_op = _run_with_mock(gan_metrics.trace_sqrt_product,
cov_real, cov_gen)
with self.test_session() as sess:
# trace_sqrt_product: tsp
actual_tsp = sess.run(trace_sqrt_prod_op)
expected_tsp = _expected_trace_sqrt_product(cov_real, cov_gen)
self.assertAllClose(actual_tsp, expected_tsp, 0.01)
def calculate_residual_correlation_matrix(returns):
# find the market return constraining on the selected companies (first PCA)
# regress each stock on that and find correlation of residuals
returns_matrix = returns.as_matrix().transpose()
covar_matrix = np.cov(returns_matrix)
pca = decomposition.PCA(n_components=1)
pca.fit(covar_matrix)
X = pca.transform(covar_matrix)
regr = linear_model.LinearRegression()
dim = covar_matrix.shape[1]
res = np.zeros(shape=(dim,dim))
for x in range(0, dim):
regr = linear_model.LinearRegression()
regr = regr.fit(X, covar_matrix[:,x])
res[:,x] = covar_matrix[:,x] - regr.predict(X)
res_corr = np.corrcoef(res)
return pd.DataFrame(res_corr, index = returns.columns, columns = returns.columns)
def create_zca(imgs, filter_bias=0.1):
meanX = np.mean(imgs, axis=0)
covX = np.cov(imgs.T)
D, E = np.linalg.eigh(covX + filter_bias * np.eye(covX.shape[0], covX.shape[1]))
assert not np.isnan(D).any()
assert not np.isnan(E).any()
assert D.min() > 0
D **= -.5
W = np.dot(E, np.dot(np.diag(D), E.T))
def transform(images):
return np.dot(images - meanX, W)
return transform
def testValueTensorIsIdempotent(self):
labels = tf.random_normal((10, 3), seed=2)
predictions = labels * 0.5 + tf.random_normal((10, 3), seed=1) * 0.5
cov, update_op = metrics.streaming_covariance(predictions, labels)
with self.test_session() as sess:
sess.run(tf.initialize_local_variables())
# Run several updates.
for _ in range(10):
sess.run(update_op)
# Then verify idempotency.
initial_cov = cov.eval()
for _ in range(10):
self.assertEqual(initial_cov, cov.eval())
def testSingleUpdateWithErrorAndWeights(self):
with self.test_session() as sess:
predictions = np.array([2, 4, 6, 8])
labels = np.array([1, 3, 2, 7])
weights = np.array([0, 1, 3, 1])
predictions_t = tf.constant(predictions, shape=(1, 4), dtype=tf.float32)
labels_t = tf.constant(labels, shape=(1, 4), dtype=tf.float32)
weights_t = tf.constant(weights, shape=(1, 4), dtype=tf.float32)
pearson_r, update_op = metrics.streaming_pearson_correlation(
predictions_t, labels_t, weights=weights_t)
p, l = _reweight(predictions, labels, weights)
cmat = np.cov(p, l)
expected_r = cmat[0, 1] / np.sqrt(cmat[0, 0] * cmat[1, 1])
sess.run(tf.initialize_local_variables())
self.assertAlmostEqual(expected_r, sess.run(update_op))
self.assertAlmostEqual(expected_r, pearson_r.eval())
def make_random_points(centers, num_points):
num_centers, num_dims = centers.shape
assignments = np.random.choice(num_centers, num_points)
offsets = np.round(np.random.randn(num_points,
num_dims).astype(np.float32) * 20)
points = centers[assignments] + offsets
means = [np.mean(points[assignments == center], axis=0)
for center in xrange(num_centers)]
covs = [np.cov(points[assignments == center].T)
for center in xrange(num_centers)]
scores = []
for r in xrange(num_points):
scores.append(np.sqrt(np.dot(
np.dot(points[r, :] - means[assignments[r]],
np.linalg.inv(covs[assignments[r]])),
points[r, :] - means[assignments[r]])))
return (points, assignments, scores)
def testValueTensorIsIdempotent(self):
labels = tf.random_normal((10, 3), seed=2)
predictions = labels * 0.5 + tf.random_normal((10, 3), seed=1) * 0.5
cov, update_op = metrics.streaming_covariance(predictions, labels)
with self.test_session() as sess:
sess.run(tf.local_variables_initializer())
# Run several updates.
for _ in range(10):
sess.run(update_op)
# Then verify idempotency.
initial_cov = cov.eval()
for _ in range(10):
self.assertEqual(initial_cov, cov.eval())
def testSingleUpdateWithErrorAndWeights(self):
with self.test_session() as sess:
predictions = np.array([2, 4, 6, 8])
labels = np.array([1, 3, 2, 7])
weights = np.array([0, 1, 3, 1])
predictions_t = tf.constant(predictions, shape=(1, 4), dtype=tf.float32)
labels_t = tf.constant(labels, shape=(1, 4), dtype=tf.float32)
weights_t = tf.constant(weights, shape=(1, 4), dtype=tf.float32)
pearson_r, update_op = metrics.streaming_pearson_correlation(
predictions_t, labels_t, weights=weights_t)
p, l = _reweight(predictions, labels, weights)
cmat = np.cov(p, l)
expected_r = cmat[0, 1] / np.sqrt(cmat[0, 0] * cmat[1, 1])
sess.run(tf.local_variables_initializer())
self.assertAlmostEqual(expected_r, sess.run(update_op))
self.assertAlmostEqual(expected_r, pearson_r.eval())
def make_random_points(centers, num_points):
num_centers, num_dims = centers.shape
assignments = np.random.choice(num_centers, num_points)
offsets = np.round(np.random.randn(num_points,
num_dims).astype(np.float32) * 20)
points = centers[assignments] + offsets
means = [np.mean(points[assignments == center], axis=0)
for center in xrange(num_centers)]
covs = [np.cov(points[assignments == center].T)
for center in xrange(num_centers)]
scores = []
for r in xrange(num_points):
scores.append(np.sqrt(np.dot(
np.dot(points[r, :] - means[assignments[r]],
np.linalg.inv(covs[assignments[r]])),
points[r, :] - means[assignments[r]])))
return (points, assignments, scores)
def test_covariance(self):
start_time = time.time()
data = self.data.T
np_cov = np.cov(data)
logging.info('Numpy took %f', time.time() - start_time)
start_time = time.time()
with self.test_session() as sess:
op = gmm_ops._covariance(
tf.constant(data.T, dtype=tf.float32),
False)
op_diag = gmm_ops._covariance(
tf.constant(data.T, dtype=tf.float32),
True)
tf.global_variables_initializer().run()
tf_cov = sess.run(op)
np.testing.assert_array_almost_equal(np_cov, tf_cov)
logging.info('Tensorflow took %f', time.time() - start_time)
tf_cov = sess.run(op_diag)
np.testing.assert_array_almost_equal(
np.diag(np_cov), np.ravel(tf_cov), decimal=5)
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def nancov(a, b, min_periods=None):
if len(a) != len(b):
raise AssertionError('Operands to nancov must have same size')
if min_periods is None:
min_periods = 1
valid = notnull(a) & notnull(b)
if not valid.all():
a = a[valid]
b = b[valid]
if len(a) < min_periods:
return np.nan
return np.cov(a, b)[0, 1]
test_window.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_flex_binary_frame(self):
def _check(method):
series = self.frame[1]
res = getattr(series.rolling(window=10), method)(self.frame)
res2 = getattr(self.frame.rolling(window=10), method)(series)
exp = self.frame.apply(lambda x: getattr(
series.rolling(window=10), method)(x))
tm.assert_frame_equal(res, exp)
tm.assert_frame_equal(res2, exp)
frame2 = self.frame.copy()
frame2.values[:] = np.random.randn(*frame2.shape)
res3 = getattr(self.frame.rolling(window=10), method)(frame2)
exp = DataFrame(dict((k, getattr(self.frame[k].rolling(
window=10), method)(frame2[k])) for k in self.frame))
tm.assert_frame_equal(res3, exp)
methods = ['corr', 'cov']
for meth in methods:
_check(meth)
test_window.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_expanding_cov_diff_index(self):
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().cov(s2)
expected = Series([None, None, 2.0])
assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().cov(s2a)
assert_series_equal(result, expected)
s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().cov(s2)
expected = Series([None, None, None, 4.5])
assert_series_equal(result, expected)
test_window.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_expanding_cov_pairwise_diff_length(self):
# GH 7512
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=['A', 'B'])
df1a = DataFrame([[1, 5], [3, 9]], index=[0, 2], columns=['A', 'B'])
df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y'])
df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y'])
result1 = df1.expanding().cov(df2a, pairwise=True)[2]
result2 = df1.expanding().cov(df2a, pairwise=True)[2]
result3 = df1a.expanding().cov(df2, pairwise=True)[2]
result4 = df1a.expanding().cov(df2a, pairwise=True)[2]
expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A', 'B'],
columns=['X', 'Y'])
assert_frame_equal(result1, expected)
assert_frame_equal(result2, expected)
assert_frame_equal(result3, expected)
assert_frame_equal(result4, expected)
test_extras.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_2d_w_missing(self):
# Test cov on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
valid = np.logical_not(getmaskarray(x)).astype(int)
frac = np.dot(valid, valid.T)
xf = (x - x.mean(1)[:, None]).filled(0)
assert_almost_equal(cov(x),
np.cov(xf) * (x.shape[1] - 1) / (frac - 1.))
assert_almost_equal(cov(x, bias=True),
np.cov(xf, bias=True) * x.shape[1] / frac)
frac = np.dot(valid.T, valid)
xf = (x - x.mean(0)).filled(0)
assert_almost_equal(cov(x, rowvar=False),
(np.cov(xf, rowvar=False) *
(x.shape[0] - 1) / (frac - 1.)))
assert_almost_equal(cov(x, rowvar=False, bias=True),
(np.cov(xf, rowvar=False, bias=True) *
x.shape[0] / frac))
def covariance_matrices(im, labels, return_mm3=True):
"""
Considers the label as a point distribution in the space, and returns the covariance matrix of the points
distributions.
:param im: input nibabel image
:param labels: list of labels input.
:param return_mm3: if true the answer is in mm if false in voxel indexes.
:return: covariance matrix of the point distribution of the label
"""
cov_matrices = [np.zeros([3, 3])] * len(labels)
for l_id, l in enumerate(labels):
coords = np.where(im.get_data() == l) # returns [X_vector, Y_vector, Z_vector]
if np.count_nonzero(coords) > 0:
cov_matrices[l_id] = np.cov(coords)
else:
cov_matrices[l_id] = np.nan * np.ones([3, 3])
if return_mm3:
cov_matrices = [im.affine[:3, :3].dot(cm.astype(np.float64)) for cm in cov_matrices]
return cov_matrices
def correlation(task,load=True):
self = mytask
if load:
self.initialize(_load=True, _logging=False, _log_dir='other/')
data = []
for batch in self.iterate_minibatches('valid'):
xtrain, ytrain = batch
ytrain = np.eye(10)[ytrain]
feed_dict = {self.x: xtrain, self.y: ytrain, self.sigma0: 1., self.initial_keep_prob: task['initial_keep_prob'], self.is_training: False}
z = tf.get_collection('log_network')[-1]
batch_z = self.sess.run( z, feed_dict)
data.append(batch_z)
data = np.vstack(data)
data = data.reshape(data.shape[0],-1)
def normal_tc(c0):
c1i = np.diag(1./np.diag(c0))
p = np.matmul(c1i,c0)
return - .5 * np.linalg.slogdet(p)[1] / c0.shape[0]
c0 = np.cov( data, rowvar=False )
tc = normal_tc(c0)
print "Total correlation: %f" % tc
def test_1d_w_missing(self):
# Test cov 1 1D variable w/missing values
x = self.data
x[-1] = masked
x -= x.mean()
nx = x.compressed()
assert_almost_equal(np.cov(nx), cov(x))
assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False))
assert_almost_equal(np.cov(nx, rowvar=False, bias=True),
cov(x, rowvar=False, bias=True))
#
try:
cov(x, allow_masked=False)
except ValueError:
pass
#
# 2 1D variables w/ missing values
nx = x[1:-1]
assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1]))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False),
cov(x, x[::-1], rowvar=False))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True),
cov(x, x[::-1], rowvar=False, bias=True))