def _logcdf(self, samples):
lower = np.full(2, -np.inf)
upper = norm.ppf(samples)
limit_flags = np.zeros(2)
if upper.shape[0] > 0:
def func1d(upper1d):
'''
Calculates the multivariate normal cumulative distribution
function of a single sample.
'''
return mvn.mvndst(lower, upper1d, limit_flags, self.theta)[1]
vals = np.apply_along_axis(func1d, -1, upper)
else:
vals = np.empty((0, ))
old_settings = np.seterr(divide='ignore')
vals = np.log(vals)
np.seterr(**old_settings)
vals[np.any(samples == 0.0, axis=1)] = -np.inf
vals[samples[:, 0] == 1.0] = np.log(samples[samples[:, 0] == 1.0, 1])
vals[samples[:, 1] == 1.0] = np.log(samples[samples[:, 1] == 1.0, 0])
return vals
python类ppf()的实例源码
def bias_corrected_ci(true_coeff, samples, conf=95):
"""
Return the bias-corrected bootstrap confidence interval, using the method from the book.
:param true_coeff: The estimates in the original sample
:param samples: The bootstrapped estimates
:param conf: The level of the desired confidence interval
:return: The bias-corrected LLCI and ULCI for the bootstrapped estimates.
"""
ptilde = (samples < true_coeff).mean()
Z = norm.ppf(ptilde)
Zci = z_score(conf)
Zlow, Zhigh = -Zci + 2 * Z, Zci + 2 * Z
plow, phigh = norm._cdf(Zlow), norm._cdf(Zhigh)
llci = np.percentile(samples, plow * 100, interpolation='lower')
ulci = np.percentile(samples, phigh * 100, interpolation='higher')
return llci, ulci
def dprime(confusion_matrix):
"""
Function takes in a 2x2 confusion matrix and returns the d-prime value for the predictions.
d' = z(hit rate)-z(false alarm rate)
http://en.wikipedia.org/wiki/D'
"""
if max(confusion_matrix.shape) > 2:
return False
else:
hit_rate = confusion_matrix[0, 0] / confusion_matrix[0, :].sum()
fa_rate = confusion_matrix[1, 0] / confusion_matrix[1, :].sum()
nudge = 0.0001
if (hit_rate >= 1): hit_rate = 1 - nudge
if (hit_rate <= 0): hit_rate = 0 + nudge
if (fa_rate >= 1): fa_rate = 1 - nudge
if (fa_rate <= 0): fa_rate = 0 + nudge
dp = norm.ppf(hit_rate)-norm.ppf(fa_rate)
return dp
# accuracy (% correct)
def faureF(dim,nsims):
array=np.empty((dim,nsims))
p=2
Prime=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,\
59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,\
127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181,\
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251,\
257, 263, 269, 271, 277, 281]
i=0
for i in range(0,nsims,1):
array[0,i]=sumDigits(i,2)
if dim==1:
"Si la dimension est égale à 1, on prend 2 comme base"
return array
else:
i=0
while p<dim:
p=Prime[i+1]
j=0
s=1
for s in range(1,dim,1):
for j in range(0,nsims,1):
array[s,j]=np.sum(toDigits3(j,p,s+1))
return norm.ppf(array)
def backward_transform(self, X):
assert len(self.data["cols"]) == X.shape[1], "Backward Transform Error"
if(self.algo == "min-max"):
return X*(self.data["max"]-self.data["min"])+self.data["min"]
elif(self.algo == "normal"):
return X*self.data["std"]+self.data["mu"]
elif(self.algo == "inv-normal"):
return (norm.ppf(X)-self.data["mu"])/self.data["std"]
elif(self.algo == "auto-normal"):
lm = self.data['boxcox'][None, :]
inv_boxcox = lambda x: np.sign(x*lm+1)*np.abs(x*lm+1)**(1./lm)
tX = X*self.data["std"]+self.data["mu"]
return inv_boxcox(tX)*(self.data["max"]-self.data["min"])+self.data["min"]
elif(self.algo == "auto-inv-normal"):
lm = self.data['boxcox'][None, :]
inv_boxcox = lambda x: np.sign(x*lm+1)*np.abs(x*lm+1)**(1./lm)
tX = norm.ppf(X, self.data["mu"], self.data["std"])
return inv_boxcox(tX)*(self.data["max"]-self.data["min"])+self.data["min"]
def frequency_test(generator, n_bits, sig_level=None, misc=None, n1=None):
if n1 is None:
n0, n1 = _calculate_n0_n1(generator, n_bits)
else:
n0 = n_bits - n1
x1 = ((n0 - n1) ** 2) / n_bits
p_value = erfc(sqrt(x1 / 2))
if type(misc) is dict:
misc.update(n0=n0, n1=n1, p_value=p_value)
if sig_level is None:
return x1
else:
limit = chi2.ppf(1 - sig_level, 1)
if type(misc) is dict:
misc.update(x=x1, limit=limit)
return x1 <= limit
def norm_std_from_iqr(mu, iqr):
"""Computes the standard deviation of a normal distribution with mean mu and IQR irq."""
# Assuming normal distribution (so symmetric), Q1 = m - (iqr / 2)
Q1 = mu - (iqr / 2.0)
# Assuming a normal distribution with mean m, std s, and first quartile Q1,
# we have (Q1 - m)/s = ppf(0.25)
return (Q1 - mu) / norm.ppf(0.25)
def tick_values(self, vmin, vmax) :
eps = 0.
return norm.cdf(AutoLocator.tick_values( self , norm.ppf( max(eps,vmin) ) , norm.ppf(min(1-eps,vmax)) ))
def transform_non_affine(self, a):
res = np.zeros( len(a) )
goodId = np.where( (a<=1.) & (a>=0.) )[0]
res[ goodId ] = norm.ppf(a[goodId] )
return res
def rvs(self, size=1):
'''
Generates random variates from the mixed vine. Currently assumes a
c-vine structure.
Parameters
----------
size : integer, optional
The number of samples to generate. (Default: 1)
Returns
-------
array_like
n-by-d matrix of samples where n is the number of samples and d
is the number of marginals.
'''
if self.is_root_layer():
# Determine distribution dimension
layer = self
while not layer.is_marginal_layer():
layer = layer.input_layer
dim = len(layer.marginals)
samples = np.random.random(size=[size, dim])
(samples, _) = self.make_dependent(samples)
# Use marginals to transform dependent uniform samples
for i, marginal in enumerate(layer.marginals):
samples[:, i] = marginal.ppf(samples[:, i])
return samples
else:
return self.output_layer.rvs(size)
def entropy(self, alpha=0.05, sem_tol=1e-3, mc_size=1000):
'''
Estimates the entropy of the mixed vine.
Parameters
----------
alpha : float, optional
Significance level of the entropy estimate. (Default: 0.05)
sem_tol : float, optional
Maximum standard error as a stopping criterion. (Default: 1e-3)
mc_size : integer, optional
Number of samples that are drawn in each iteration of the Monte
Carlo estimation. (Default: 1000)
Returns
-------
ent : float
Estimate of the mixed vine entropy in bits.
sem : float
Standard error of the mixed vine entropy estimate in bits.
'''
# Gaussian confidence interval for sem_tol and level alpha
conf = norm.ppf(1 - alpha)
sem = np.inf
ent = 0.0
var_sum = 0.0
k = 0
while sem >= sem_tol:
# Generate samples
samples = self.rvs(mc_size)
logp = self.logpdf(samples)
log2p = logp[np.isfinite(logp)] / np.log(2)
k += 1
# Monte-Carlo estimate of entropy
ent += (-np.mean(log2p) - ent) / k
# Estimate standard error
var_sum += np.sum((-log2p - ent) ** 2)
sem = conf * np.sqrt(var_sum / (k * mc_size * (k * mc_size - 1)))
return ent, sem
def _logpdf(self, samples):
if self.theta >= 1.0:
vals = np.zeros(samples.shape[0])
vals[samples[:, 0] == samples[:, 1]] = np.inf
elif self.theta <= -1.0:
vals = np.zeros(samples.shape[0])
vals[samples[:, 0] == 1 - samples[:, 1]] = np.inf
else:
nrvs = norm.ppf(samples)
vals = 2 * self.theta * nrvs[:, 0] * nrvs[:, 1] - self.theta**2 \
* (nrvs[:, 0]**2 + nrvs[:, 1]**2)
vals /= 2 * (1 - self.theta**2)
vals -= np.log(1 - self.theta**2) / 2
return vals
def _ccdf(self, samples):
vals = np.zeros(samples.shape[0])
# Avoid subtraction of infinities
neqz = np.bitwise_and(np.any(samples > 0.0, axis=1),
np.any(samples < 1.0, axis=1))
nrvs = norm.ppf(samples[neqz, :])
vals[neqz] = norm.cdf((nrvs[:, 0] - self.theta * nrvs[:, 1])
/ np.sqrt(1 - self.theta**2))
vals[np.invert(neqz)] = norm.cdf(0.0)
return vals
def preprocess(self, x, fit=False):
"""Transform each marginal to be as close to a standard Gaussian as possible.
'standard' (default) just subtracts the mean and scales by the std.
'empirical' does an empirical gaussianization (but this cannot be inverted).
'outliers' tries to squeeze in the outliers
Any other choice will skip the transformation."""
if self.missing_values is not None:
x, self.n_obs = mean_impute(x, self.missing_values) # Creates a copy
else:
self.n_obs = len(x)
if self.gaussianize == 'none':
pass
elif self.gaussianize == 'standard':
if fit:
mean = np.mean(x, axis=0)
# std = np.std(x, axis=0, ddof=0).clip(1e-10)
std = np.sqrt(np.sum((x - mean)**2, axis=0) / self.n_obs).clip(1e-10)
self.theta = (mean, std)
x = ((x - self.theta[0]) / self.theta[1])
if np.max(np.abs(x)) > 6 and self.verbose:
print("Warning: outliers more than 6 stds away from mean. Consider using gaussianize='outliers'")
elif self.gaussianize == 'outliers':
if fit:
mean = np.mean(x, axis=0)
std = np.std(x, axis=0, ddof=0).clip(1e-10)
self.theta = (mean, std)
x = g((x - self.theta[0]) / self.theta[1]) # g truncates long tails
elif self.gaussianize == 'empirical':
print("Warning: correct inversion/transform of empirical gauss transform not implemented.")
x = np.array([norm.ppf((rankdata(x_i) - 0.5) / len(x_i)) for x_i in x.T]).T
if self.gpu and fit: # Don't return GPU matrices when only transforming
x = cm.CUDAMatrix(x)
return x
def obrien_fleming(information_fraction, alpha=0.05):
"""
Calculate an approximation of the O'Brien-Fleming alpha spending function.
Args:
information_fraction (scalar or array_like): share of the information
amount at the point of evaluation, e.g. the share of the maximum
sample size
alpha: type-I error rate
Returns:
float: redistributed alpha value at the time point with the given
information fraction
"""
return (1 - norm.cdf(norm.ppf(1 - alpha / 2) / np.sqrt(information_fraction))) * 2
def compute_margin_of_error(array_1d, confidence=0.95):
"""
Compute margin of error.
Arguments:
array_1d (array):
confidence (float): 0 <= confidence <= 1
Returns:
float:
"""
return norm.ppf(q=confidence) * array_1d.std() / sqrt(array_1d.size)
def _predict(self, steps, exog, alpha):
assert 0 < alpha < 1
y = (exog if exog is not None else self._endog)[-self.results.k_ar:]
forecast = self.results.forecast(y, steps)
# FIXME: The following is adapted from statsmodels's
# VAR.forecast_interval() as the original doesn't work
q = norm.ppf(1 - alpha / 2)
sigma = np.sqrt(np.abs(np.diagonal(self.results.mse(steps), axis1=2)))
err = q * sigma
return np.asarray([forecast, forecast - err, forecast + err])
def z_score(conf):
"""
:param conf: The level of confidence desired
:return: The Z-score corresponding to the level of confidence desired.
"""
return norm.ppf((100 - (100 - conf) / 2) / 100)
def bs_delta_to_strike(fwd, delta, vol, texp):
ys = norm.ppf(delta)
return fwd/math.exp((ys - 0.5 * vol * math.sqrt(texp)) * vol * math.sqrt(texp))
def bachelier_delta_to_strike(fwd, delta, vol, texp):
return fwd - norm.ppf(delta) * vol * math.sqrt(texp)
def generate(self, n_samples=100, batch_size=32, **kwargs):
"""
Sample new data from the generator network.
Args:
n_samples: int, the number of samples to be generated
batch_size: int, number of generated samples are once
Keyword Args:
return_probs: bool, whether the output generations should be raw probabilities or sampled Bernoulli outcomes
latent_samples: ndarray, alternative source of latent encoding, otherwise sampling will be applied
Returns:
The generated data as ndarray of shape (n_samples, data_dim)
"""
return_probs = kwargs.get('return_probs', True)
latent_samples = kwargs.get('latent_samples', None)
if latent_samples is not None:
data_iterator, n_iters = self.data_iterator.iter(latent_samples, batch_size=batch_size, mode='generation')
data_probs = self.generative_model.predict_generator(data_iterator, steps=n_iters)
else:
if self.latent_dim == 2:
# perform 2d grid search
n_samples_per_axis = complex(int(np.sqrt(n_samples)))
uniform_grid = np.mgrid[0.01:0.99:n_samples_per_axis, 0.01:0.99:n_samples_per_axis].reshape(2, -1).T
latent_samples = standard_gaussian.ppf(uniform_grid)
else:
latent_samples = np.random.standard_normal(size=(n_samples, self.latent_dim))
data_iterator, n_iters = self.data_iterator.iter(latent_samples, batch_size=batch_size, mode='generation')
data_probs = self.generative_model.predict_generator(data_iterator, steps=n_iters)
if return_probs:
return data_probs
sampled_data = np.random.binomial(1, p=data_probs)
return sampled_data
def _transform_col(self, x, col):
"""Normalize one numerical column.
Args:
x (numpy.array): a numerical column to normalize
col (int): column index
Returns:
A normalized feature vector.
"""
return norm.ppf(self.ecdfs[col](x) * .998 + .001)
def vdc(n, base):
"""
Cette fonction permet de calculer le n-ieme nombre de la base b de la
séquence de Van Der Corput
"""
vdc, denom = 0,1
while n:
denom *= base
n, remainder = divmod(n, base)
vdc += remainder / denom
return norm.ppf(vdc)
def halton2(dim, nsims):
"""
la fonction ne crash plus.
Version 2 de la suite d'halton sans la librairie Python existante.
"""
h = np.empty(nsims * dim)
h.fill(np.nan)
p = np.empty(nsims)
p.fill(np.nan)
Base = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
lognsims = log(nsims + 1)
for i in range(dim):
base = Base[i]
n = int(ceil(lognsims / log(base)))
for t in range(n):
p[t] = pow(base, -(t + 1) )
for j in range(nsims):
d = j + 1
somme = fmod(d, base) * p[0]
for t in range(1, n):
d = floor(d / base)
somme += fmod(d, base) * p[t]
h[j*dim + i] = somme
return norm.ppf(h.reshape(dim, nsims))
def stratified_sampling1(M,nsims):
array=np.empty(nsims)
array=lh.sample(M,nsims)
array=norm.ppf(array)
return array.reshape(M,nsims)
def stratified_sampling2(nnoises,nsims):
"""On divise l'intervalle [0,1] en plusieurs stratas, m stratas"""
noises = np.empty((nnoises,nsims))
m=int(nnoises/nsims)
i=0
"""Pour chaque strata, on prend l échantillons suivant la loi uniforme
tel que nsims=m*l et enfin on prend le pdf du cdf."""
for i in range(m+1,1):
noises[i,:]=norm.ppf(np.random.uniform(i/m,(i+1)/m,nsims))
return noises
def stratified_sampling4(M,nsims):
L=nsims/M
i=0
noises=np.empty((M,nsims))
for i in range(M):
noises[i,:]=norm.ppf(np.random.uniform(i/M,(i+1)/M,nsims))
return noises
def stratified_samplingF(dim,nsims):
"Latin Hypercube Sample, une forme efficient de stratification à plusieurs dimensions"
lhd=np.empty((dim,nsims))
lhd = lhs(dim, samples=nsims)
lhd=norm.ppf(lhd)
lhd=np.transpose(lhd)
return lhd
def __StandardQTFun__(data):
data0 = data.reset_index(level=0, drop=True)
NotNAN = pd.notnull(data0)
quantile = data0.rank(method='min') / NotNAN.sum()
quantile.loc[quantile[data.columns[0]]==1, :] = 1 - 10 ** (-6)
data_after_standard = norm.ppf(quantile)
return pd.DataFrame(data_after_standard, index=data.index, columns=data.columns)
def estimateMonteCarloSampleCount(self, TOL):
theta = self._calcTheta(TOL, self.bias)
V = self.Vl_estimate[self.last_itr.lvls_find([])]
if np.isnan(V):
return np.nan
Ca = norm.ppf(self.params.confidence)
return np.maximum(np.reshape(self.params.M0, (1,))[-1],
int(np.ceil((theta * TOL / Ca)**-2 * V)))
################## Bayesian specific functions