def alpha(self):
# Cronbach Alpha
alpha = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
block = self.data_[self.Variables['measurement']
[self.Variables['latent'] == self.latent[i]]]
p = len(block.columns)
if(p != 1):
p_ = len(block)
correction = np.sqrt((p_ - 1) / p_)
soma = np.var(np.sum(block, axis=1))
cor_ = pd.DataFrame.corr(block)
denominador = soma * correction**2
numerador = 2 * np.sum(np.tril(cor_) - np.diag(np.diag(cor_)))
alpha_ = (numerador / denominador) * (p / (p - 1))
alpha[self.latent[i]] = alpha_
else:
alpha[self.latent[i]] = 1
return alpha.T
python类var()的实例源码
def __call__(self, *inputvals):
assert len(inputvals) == len(self.nondata_inputs) + len(self.data_inputs)
nondata_vals = inputvals[0:len(self.nondata_inputs)]
data_vals = inputvals[len(self.nondata_inputs):]
feed_dict = dict(zip(self.nondata_inputs, nondata_vals))
n = data_vals[0].shape[0]
for v in data_vals[1:]:
assert v.shape[0] == n
for i_start in range(0, n, self.batch_size):
slice_vals = [v[i_start:min(i_start+self.batch_size, n)] for v in data_vals]
for (var,val) in zip(self.data_inputs, slice_vals):
feed_dict[var]=val
results = tf.get_default_session().run(self.outputs, feed_dict=feed_dict)
if i_start==0:
sum_results = results
else:
for i in range(len(results)):
sum_results[i] = sum_results[i] + results[i]
for i in range(len(results)):
sum_results[i] = sum_results[i] / n
return sum_results
# ================================================================
# Modules
# ================================================================
def effective_sample_size(x, mu, var, logger):
"""
Calculate the effective sample size of sequence generated by MCMC.
:param x:
:param mu: mean of the variable
:param var: variance of the variable
:param logger: logg
:return: effective sample size of the sequence
Make sure that `mu` and `var` are correct!
"""
# batch size, time, dimension
b, t, d = x.shape
ess_ = np.ones([d])
for s in range(1, t):
p = auto_correlation_time(x, s, mu, var)
if np.sum(p > 0.05) == 0:
break
else:
for j in range(0, d):
if p[j] > 0.05:
ess_[j] += 2.0 * p[j] * (1.0 - float(s) / t)
logger.info('ESS: max [%f] min [%f] / [%d]' % (t / np.min(ess_), t / np.max(ess_), t))
return t / ess_
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers):
# TODO: Calculate batchnorm using GPU Tensors.
assert len(hat_z_layers) == len(z_pre_layers)
hat_z_layers_normalized = []
for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)):
if self.use_cuda:
ones = Variable(torch.ones(z_pre.size()[0], 1).cuda())
else:
ones = Variable(torch.ones(z_pre.size()[0], 1))
mean = torch.mean(z_pre, 0)
noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size())
if self.use_cuda:
var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
else:
var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
var = Variable(torch.FloatTensor(var))
if self.use_cuda:
hat_z = hat_z.cpu()
ones = ones.cpu()
mean = mean.cpu()
hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10)))
if self.use_cuda:
hat_z_normalized = hat_z_normalized.cuda()
hat_z_layers_normalized.append(hat_z_normalized)
return hat_z_layers_normalized
def test_ddof_too_big(self):
nanfuncs = [np.nanvar, np.nanstd]
stdfuncs = [np.var, np.std]
dsize = [len(d) for d in _rdat]
for nf, rf in zip(nanfuncs, stdfuncs):
for ddof in range(5):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
tgt = [ddof >= d for d in dsize]
res = nf(_ndat, axis=1, ddof=ddof)
assert_equal(np.isnan(res), tgt)
if any(tgt):
assert_(len(w) == 1)
assert_(issubclass(w[0].category, RuntimeWarning))
else:
assert_(len(w) == 0)
def update(self, x):
batch_mean = np.mean(x, axis=0)
batch_var = np.var(x, axis=0)
batch_count = x.shape[0]
delta = batch_mean - self.mean
tot_count = self.count + batch_count
new_mean = self.mean + delta * batch_count / tot_count
m_a = self.var * (self.count)
m_b = batch_var * (batch_count)
M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
new_var = M2 / (self.count + batch_count)
new_count = batch_count + self.count
self.mean = new_mean
self.var = new_var
self.count = new_count
def test_runningmeanstd():
for (x1, x2, x3) in [
(np.random.randn(3), np.random.randn(4), np.random.randn(5)),
(np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
]:
rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
x = np.concatenate([x1, x2, x3], axis=0)
ms1 = [x.mean(axis=0), x.var(axis=0)]
rms.update(x1)
rms.update(x2)
rms.update(x3)
ms2 = [rms.mean, rms.var]
assert np.allclose(ms1, ms2)
def autocorrelate(signal, lag=1):
"""Gives the correlation coefficient for the signal's correlation with itself.
Args:
signal: The signal on which to compute the autocorrelation. Can be a list.
lag: The offset at which to correlate the signal with itself. E.g. if lag
is 1, will compute the correlation between the signal and itself 1 beat
later.
Returns:
Correlation coefficient.
"""
n = len(signal)
x = np.asarray(signal) - np.mean(signal)
c0 = np.var(signal)
return (x[lag:] * x[:n - lag]).sum() / float(n) / c0
def effective_sample_size_1d(samples):
"""
Compute the effective sample size of a chain of scalar samples.
:param samples: A 1-D numpy array. The chain of samples.
:return: A float. The effective sample size.
"""
n = samples.shape[0]
mu_hat = np.mean(samples)
var = np.var(samples) * n / (n - 1)
var_plus = var * (n - 1) / n
def auto_covariance(lag):
return np.mean((samples[:n - lag] - mu_hat) * (samples[lag:] - mu_hat))
sum_rho = 0
for t in range(0, n):
rho = 1 - (var - auto_covariance(t)) / var_plus
if rho < 0:
break
sum_rho += rho
ess = n / (1 + 2 * sum_rho)
return ess
def __init__(self, kernel='rbf', nbases=50, lenscale=1., var=1.,
regulariser=1., ard=True, maxiter=3000, batch_size=10,
alpha=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8,
random_state=None, nstarts=500):
super().__init__(likelihood=Gaussian(Parameter(var, Positive())),
basis=None,
maxiter=maxiter,
batch_size=batch_size,
updater=Adam(alpha, beta1, beta2, epsilon),
random_state=random_state,
nstarts=nstarts
)
self._store_params(kernel, regulariser, nbases, lenscale, ard)
# Bespoke regressor for basin-depth problems
def __init__(self, kernel='rbf', nbases=50, lenscale=1., var=1.,
falloff=1., regulariser=1., ard=True,
indicator_field='censored', maxiter=3000,
batch_size=10, alpha=0.01, beta1=0.9,
beta2=0.99, epsilon=1e-8, random_state=None):
lhood = Switching(lenscale=falloff,
var_init=Parameter(var, Positive()))
super().__init__(likelihood=lhood,
basis=None,
maxiter=maxiter,
batch_size=batch_size,
updater=Adam(alpha, beta1, beta2, epsilon),
random_state=random_state
)
self.indicator_field = indicator_field
self._store_params(kernel, regulariser, nbases, lenscale, ard)
def test_final_variance_runs(self):
exp = VarianceExperiment()
printer_final = Print(name="Final")
avgr = Averager('repeats', name="TestAverager")
var_buff = DataBuffer(name='Variance Buffer')
mean_buff = DataBuffer(name='Mean Buffer')
edges = [(exp.chan1, avgr.sink),
(avgr.final_variance, printer_final.sink),
(avgr.final_variance, var_buff.sink),
(avgr.final_average, mean_buff.sink)]
exp.set_graph(edges)
exp.run_sweeps()
var_data = var_buff.get_data()['Variance'].reshape(var_buff.descriptor.data_dims())
mean_data = mean_buff.get_data()['chan1'].reshape(mean_buff.descriptor.data_dims())
orig_data = exp.vals.reshape(exp.chan1.descriptor.data_dims())
self.assertTrue(np.abs(np.sum(mean_data - np.mean(orig_data, axis=0))) <= 1e-3)
self.assertTrue(np.abs(np.sum(var_data - np.var(orig_data, axis=0, ddof=1))) <= 1e-3)
def regularize(features):
#regularize per column
for i in range(0,len(features[0])):
try:
#take evary column
feat=features[:,i]
#mean and variance of every column
mean=np.mean(feat)
var=np.var(feat)
if(var!=0):
features[:,i]=(features[:,i]-mean)/float(3*var)
else :
features[:,i]=0
except:
pass
features[features>1]=1
features[features<-1]=-1
return features
#reguralize features to [-1,1] horizontally, yi=yi/norm(yi,2)
def test_variance_wgrad(input_tensor):
inputs = input_tensor
targets = ng.placeholder(inputs.axes)
inp_stat = ng.variance(inputs, reduction_axes=inputs.axes.batch_axes())
err = ng.sum(inp_stat - targets, out_axes=())
d_inputs = ng.deriv(err, inputs)
with executor([err, d_inputs], inputs, targets) as comp_func:
input_value = rng.uniform(-0.1, 0.1, inputs.axes)
target_value = rng.uniform(-0.1, 0.1, targets.axes)
ng_f_res, ng_b_res = comp_func(input_value, target_value)
np_f_res = np.sum(np.var(input_value, axis=1, keepdims=True) - target_value)
ng.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4)
np_b_res = 2 * (input_value - np.mean(input_value, axis=1, keepdims=True))
ng.testing.assert_allclose(np_b_res, ng_b_res, atol=1e-4, rtol=1e-4)
def calculateParamGV(feat_file_list, feat_dim=32):
data = numpy.empty((1, feat_dim))
for file_index in range(len(feat_file_list)):
file_name = feat_file_list[file_index]
(junk, ext) = feat_file_list[file_index].split('.')
features = readBottleneckFeatures(file_name, feat_dim)
if ext == 'lf0': #remove unvoiced values
features = features[numpy.where(features != -1.*(10**(10)))[0]]
features = numpy.exp(features) #convert to linear scale
if file_index==0:
data=features
else:
data=numpy.concatenate((data,features),0)
gv = numpy.var(data, 0)
return gv
def bn_forward(X, gamma, beta, cache, momentum=.9, train=True):
running_mean, running_var = cache
if train:
mu = np.mean(X, axis=0)
var = np.var(X, axis=0)
X_norm = (X - mu) / np.sqrt(var + c.eps)
out = gamma * X_norm + beta
cache = (X, X_norm, mu, var, gamma, beta)
running_mean = util.exp_running_avg(running_mean, mu, momentum)
running_var = util.exp_running_avg(running_var, var, momentum)
else:
X_norm = (X - running_mean) / np.sqrt(running_var + c.eps)
out = gamma * X_norm + beta
cache = None
return out, cache, running_mean, running_var
def bn_backward(dout, cache):
X, X_norm, mu, var, gamma, beta = cache
N, D = X.shape
X_mu = X - mu
std_inv = 1. / np.sqrt(var + c.eps)
dX_norm = dout * gamma
dvar = np.sum(dX_norm * X_mu, axis=0) * -.5 * std_inv**3
dmu = np.sum(dX_norm * -std_inv, axis=0) + dvar * np.mean(-2. * X_mu, axis=0)
dX = (dX_norm * std_inv) + (dvar * 2 * X_mu / N) + (dmu / N)
dgamma = np.sum(dout * X_norm, axis=0)
dbeta = np.sum(dout, axis=0)
return dX, dgamma, dbeta
def cv_gradient(self,z):
"""
The control variate augmented Monte Carlo gradient estimate
"""
gradient = np.zeros(np.sum(self.approx_param_no))
z_t = z.T
log_q = self.normal_log_q(z.T)
log_p = self.log_p(z.T)
grad_log_q = self.grad_log_q(z)
gradient = grad_log_q*(log_p-log_q)
alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no))
vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T
return np.mean(vectorized,axis=1)
def cv_gradient_initial(self,z):
"""
The control variate augmented Monte Carlo gradient estimate
"""
gradient = np.zeros(np.sum(self.approx_param_no))
z_t = z.T
log_q = self.normal_log_q_initial(z.T)
log_p = self.log_p(z.T)
grad_log_q = self.grad_log_q(z)
gradient = grad_log_q*(log_p-log_q)
alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no))
vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T
return np.mean(vectorized,axis=1)
def cv_gradient(self, z):
"""
The control variate augmented Monte Carlo gradient estimate
RAO-BLACKWELLIZED!
"""
z_t = np.transpose(z)
log_q = self.normal_log_q(z_t)
log_p = self.log_p(z_t)
grad_log_q = self.grad_log_q(z)
gradient = grad_log_q*np.repeat((log_p - log_q).T,2,axis=0)
alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no))
vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T
return np.mean(vectorized,axis=1)
def cv_gradient_initial(self,z):
"""
The control variate augmented Monte Carlo gradient estimate
RAO-BLACKWELLIZED!
"""
z_t = np.transpose(z)
log_q = self.normal_log_q_initial(z_t)
log_p = self.log_p(z_t)
grad_log_q = self.grad_log_q(z)
gradient = grad_log_q*np.repeat((log_p - log_q).T,2,axis=0)
alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no))
vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T
return np.mean(vectorized,axis=1)
def testRandom(self):
ig = InverseGaussian(1., 1.)
samples = ig.random(1000000)
mu = numpy.mean(samples)
var = numpy.var(samples)
self.assertAlmostEqual(ig.mu, mu, delta=1e-1)
self.assertAlmostEqual(ig.mu ** 3 / ig.shape, var, delta=1e-1)
ig = InverseGaussian(3., 6.)
samples = ig.random(1000000)
mu = numpy.mean(samples)
var = numpy.var(samples)
self.assertAlmostEqual(ig.mu, mu, delta=1e-1)
self.assertAlmostEqual(ig.mu ** 3 / ig.shape, var, delta=5e-1)
def testRandom(self):
from scipy.special import kv
from numpy import sqrt
a = 2.
b = 1.
p = 1
gig = GeneralizedInverseGaussian(a, b, p)
samples = gig.random(10000)
mu_analytical = sqrt(b) * kv(p + 1, sqrt(a * b)) / (sqrt(a) * kv(p, sqrt(a * b)))
var_analytical = b * kv(p + 2, sqrt(a * b)) / a / kv(p, sqrt(a * b)) - mu_analytical ** 2
mu = numpy.mean(samples)
var = numpy.var(samples)
self.assertAlmostEqual(mu_analytical, mu, delta=1e-1)
self.assertAlmostEqual(var_analytical, var, delta=1e-1)
calculate_aggregate_statistics.py 文件源码
项目:tbp-next-basket
作者: GiulioRossetti
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def calculate_aggregate(values):
agg_measures = {
'avg': np.mean(values),
'std': np.std(values),
'var': np.var(values),
'med': np.median(values),
'10p': np.percentile(values, 10),
'25p': np.percentile(values, 25),
'50p': np.percentile(values, 50),
'75p': np.percentile(values, 75),
'90p': np.percentile(values, 90),
'iqr': np.percentile(values, 75) - np.percentile(values, 25),
'iqm': interquartile_range_mean(values),
'mad': mean_absolute_deviation(values),
'cov': 1.0 * np.mean(values) / np.std(values),
'gin': gini_coefficient(values),
'skw': stats.skew(values),
'kur': stats.kurtosis(values),
'sum': np.sum(values)
}
return agg_measures
def test_sample_contexts_from_distribution():
env = Catapult(segments=[(0, 0), (20, 0)], context_interval=(0, 20),
context_distribution=uniform(5, 10), random_state=0)
env.init()
contexts = np.empty(1000)
for i in range(contexts.shape[0]):
context = env.request_context(None)
contexts[i] = context[0]
norm_dist = uniform(0.25, 0.5)
assert_true(np.all(0.25 <= contexts))
assert_true(np.all(contexts <= 0.75))
mean, var = norm_dist.stats("mv")
assert_almost_equal(np.mean(contexts), mean, places=1)
assert_almost_equal(np.var(contexts), var, places=1)
def displayDataset(self, dataset):
eps = 0.00001
linewidth = dataset.linewidth
if np.var(dataset.values) < eps:
linewidth += 2
mean = np.mean(dataset.values)
x = np.arange(0, 1, 0.1)
x = np.sort(np.append(x, [mean, mean-eps, mean+eps]))
density = [1 if v == mean else 0 for v in x]
else:
self.kde.fit(np.asarray([[x] for x in dataset.values]))
## Computes the x axis
x_max = np.amax(dataset.values)
x_min = np.amin(dataset.values)
delta = x_max - x_min
density_delta = 1.1 * delta
x = np.arange(x_min, x_max, density_delta / self.num_points)
x_density = [[y] for y in x]
## kde.score_samples returns the 'log' of the density
log_density = self.kde.score_samples(x_density).tolist()
density = map(math.exp, log_density)
self.ax.plot(x, density, label = dataset.label, color = dataset.color,
linewidth = linewidth, linestyle = dataset.linestyle)
def snr(vref, vcmp):
"""
Compute Signal to Noise Ratio (SNR) of two images.
Parameters
----------
vref : array_like
Reference image
vcmp : array_like
Comparison image
Returns
-------
x : float
SNR of `vcmp` with respect to `vref`
"""
dv = np.var(vref)
with np.errstate(divide='ignore'):
rt = dv/mse(vref, vcmp)
return 10.0*np.log10(rt)
def bsnr(vblr, vnsy):
"""
Compute Blurred Signal to Noise Ratio (BSNR) for a blurred and noisy
image.
Parameters
----------
vblr : array_like
Blurred noise free image
vnsy : array_like
Blurred image with additive noise
Returns
-------
x : float
BSNR of `vnsy` with respect to `vblr` and `vdeg`
"""
blrvar = np.var(vblr)
nsevar = np.var(vnsy - vblr)
with np.errstate(divide='ignore'):
rt = blrvar/nsevar
return 10.0*np.log10(rt)
def forward_pass(self, X, training=True):
# Initialize running mean and variance if first run
if self.running_mean is None:
self.running_mean = np.mean(X, axis=0)
self.running_var = np.var(X, axis=0)
if training:
mean = np.mean(X, axis=0)
var = np.var(X, axis=0)
self.X_centered = X - mean
self.stddev_inv = 1 / np.sqrt(var + self.eps)
self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
else:
mean = self.running_mean
var = self.running_var
X_norm = (X - mean) / np.sqrt(var + self.eps)
output = self.gamma * X_norm + self.beta
return output
def Gelman_Rubin(sampled_parameters):
nsamples = len(sampled_parameters[0])
nchains = len(sampled_parameters)
nburnin = nsamples//2
chain_var = [np.var(sampled_parameters[chain][nburnin:,:], axis=0) for chain in range(nchains)]
W = np.mean(chain_var, axis=0)
chain_means = [np.mean(sampled_parameters[chain][nburnin:,:], axis=0) for chain in range(nchains)]
B = np.var(chain_means, axis=0)
var_est = (W*(1-(1./nsamples))) + B
Rhat = np.sqrt(np.divide(var_est, W))
return Rhat