def compHistDistance(h1, h2):
def normalize(h):
if np.sum(h) == 0:
return h
else:
return h / np.sum(h)
def smoothstep(x, x_min=0., x_max=1., k=2.):
m = 1. / (x_max - x_min)
b = - m * x_min
x = m * x + b
return betainc(k, k, np.clip(x, 0., 1.))
def fn(X, Y, k):
return 4. * (1. - smoothstep(Y, 0, (1 - Y) * X + Y + .1)) \
* np.sqrt(2 * X) * smoothstep(X, 0., 1. / k, 2) \
+ 2. * smoothstep(Y, 0, (1 - Y) * X + Y + .1) \
* (1. - 2. * np.sqrt(2 * X) * smoothstep(X, 0., 1. / k, 2) - 0.5)
h1 = normalize(h1)
h2 = normalize(h2)
return max(0, np.sum(fn(h2, h1, len(h1))))
# return np.sum(np.where(h2 != 0, h2 * np.log10(h2 / (h1 + 1e-10)), 0)) # KL divergence
python类betainc()的实例源码
def my_t1cdf(x):
'''
cumulative distribution function of a t-dist. with 1 degree of freedom
function p=my_t1cdf(x)
input
x = point
output
p = cumulative probability
see also: tcdf
'''
xsq=x*x;
p = betainc(1/2, 1/2,1 / (1 + xsq)) / 2
p[x>0]=1-p[x>0]
return p
def cdf(self, x):
"""Evaluates the CDF along the values ``x``."""
y = .5 * betainc(self.m / 2., .5, np.sin(np.pi * x) ** 2)
return np.where(x < .5, y, 1 - y)
def MakeCdf(self, steps=101):
"""Returns the CDF of this distribution."""
xs = [i / (steps - 1.0) for i in range(steps)]
ps = [special.betainc(self.alpha, self.beta, x) for x in xs]
cdf = Cdf(xs, ps)
return cdf
def ttest_1samp(population_mean, a, axis=0):
"""
Calculates the T-test for the mean of ONE group of scores.
Parameters
----------
a : array_like
sample observation
population_mean : float or array_like
expected value in null hypothesis, if array_like than it must have the
same shape as `a` excluding the axis dimension
axis : int or None, optional
Axis along which to compute test. If None, compute over the whole
array `a`.
Returns
-------
statistic : float or array
t-statistic
pvalue : float or array
two-tailed p-value
Notes
-----
For more details on `ttest_1samp`, see `stats.ttest_1samp`.
"""
a, axis = _chk_asarray(a, axis)
if a.size == 0:
return 'Empty Array'
sample_mean = a.mean(axis=axis)
sample_var = a.var(axis=axis, ddof=1)
sample_count = a.count(axis=axis)
# force df to be an array for masked division not to throw a warning
df = ma.asanyarray(sample_count - 1.0)
svar = ((sample_count - 1.0) * sample_var) / df
with np.errstate(divide='ignore', invalid='ignore'):
t = (sample_mean - population_mean) / ma.sqrt(svar / sample_count)
prob = special.betainc(0.5 * df, 0.5, df / (df + t * t))
return Ttest_1sampResult(t, prob)
def MakeCdf(self, steps=101):
"""Returns the CDF of this distribution."""
xs = [i / (steps - 1.0) for i in range(steps)]
ps = special.betainc(self.alpha, self.beta, xs)
cdf = Cdf(xs, ps)
return cdf
def MakeCdf(self, steps=101):
"""Returns the CDF of this distribution."""
xs = [i / (steps - 1.0) for i in range(steps)]
ps = special.betainc(self.alpha, self.beta, xs)
cdf = Cdf(xs, ps)
return cdf
def log_sf(self,x):
scalar = not isinstance(x,np.ndarray)
x = np.atleast_1d(x)
errs = np.seterr(divide='ignore')
ret = np.log(special.betainc(x+1,self.r,self.p))
np.seterr(**errs)
ret[x < 0] = np.log(1.)
if scalar:
return ret[0]
else:
return ret
def resid_anscombe(self, endog, mu):
'''
The Anscombe residuals
Parameters
----------
endog : array-like
Endogenous response variable
mu : array-like
Fitted mean response variable
Returns
-------
resid_anscombe : array
The Anscombe residuals as defined below.
Notes
-----
sqrt(n)*(cox_snell(endog)-cox_snell(mu))/(mu**(1/6.)*(1-mu)**(1/6.))
where cox_snell is defined as
cox_snell(x) = betainc(2/3., 2/3., x)*betainc(2/3.,2/3.)
where betainc is the incomplete beta function
The name 'cox_snell' is idiosyncratic and is simply used for
convenience following the approach suggested in Cox and Snell (1968).
Further note that
cox_snell(x) = x**(2/3.)/(2/3.)*hyp2f1(2/3.,1/3.,5/3.,x)
where hyp2f1 is the hypergeometric 2f1 function. The Anscombe
residuals are sometimes defined in the literature using the
hyp2f1 formulation. Both betainc and hyp2f1 can be found in scipy.
References
----------
Anscombe, FJ. (1953) "Contribution to the discussion of H. Hotelling's
paper." Journal of the Royal Statistical Society B. 15, 229-30.
Cox, DR and Snell, EJ. (1968) "A General Definition of Residuals."
Journal of the Royal Statistical Society B. 30, 248-75.
'''
cox_snell = lambda x: (special.betainc(2/3., 2/3., x)
* special.beta(2/3., 2/3.))
return np.sqrt(self.n) * ((cox_snell(endog) - cox_snell(mu)) /
(mu**(1/6.) * (1 - mu)**(1/6.)))
def resid_anscombe(self, endog, mu):
'''
The Anscombe residuals
Parameters
----------
endog : array-like
Endogenous response variable
mu : array-like
Fitted mean response variable
Returns
-------
resid_anscombe : array
The Anscombe residuals as defined below.
Notes
-----
sqrt(n)*(cox_snell(endog)-cox_snell(mu))/(mu**(1/6.)*(1-mu)**(1/6.))
where cox_snell is defined as
cox_snell(x) = betainc(2/3., 2/3., x)*betainc(2/3.,2/3.)
where betainc is the incomplete beta function
The name 'cox_snell' is idiosyncratic and is simply used for
convenience following the approach suggested in Cox and Snell (1968).
Further note that
cox_snell(x) = x**(2/3.)/(2/3.)*hyp2f1(2/3.,1/3.,5/3.,x)
where hyp2f1 is the hypergeometric 2f1 function. The Anscombe
residuals are sometimes defined in the literature using the
hyp2f1 formulation. Both betainc and hyp2f1 can be found in scipy.
References
----------
Anscombe, FJ. (1953) "Contribution to the discussion of H. Hotelling's
paper." Journal of the Royal Statistical Society B. 15, 229-30.
Cox, DR and Snell, EJ. (1968) "A General Definition of Residuals."
Journal of the Royal Statistical Society B. 30, 248-75.
'''
cox_snell = lambda x: (special.betainc(2/3., 2/3., x)
* special.beta(2/3., 2/3.))
return np.sqrt(self.n) * ((cox_snell(endog) - cox_snell(mu)) /
(mu**(1/6.) * (1 - mu)**(1/6.)))
def resid_anscombe(self, endog, mu):
'''
The Anscombe residuals
Parameters
----------
endog : array-like
Endogenous response variable
mu : array-like
Fitted mean response variable
Returns
-------
resid_anscombe : array
The Anscombe residuals as defined below.
Notes
-----
sqrt(n)*(cox_snell(endog)-cox_snell(mu))/(mu**(1/6.)*(1-mu)**(1/6.))
where cox_snell is defined as
cox_snell(x) = betainc(2/3., 2/3., x)*betainc(2/3.,2/3.)
where betainc is the incomplete beta function
The name 'cox_snell' is idiosyncratic and is simply used for
convenience following the approach suggested in Cox and Snell (1968).
Further note that
cox_snell(x) = x**(2/3.)/(2/3.)*hyp2f1(2/3.,1/3.,5/3.,x)
where hyp2f1 is the hypergeometric 2f1 function. The Anscombe
residuals are sometimes defined in the literature using the
hyp2f1 formulation. Both betainc and hyp2f1 can be found in scipy.
References
----------
Anscombe, FJ. (1953) "Contribution to the discussion of H. Hotelling's
paper." Journal of the Royal Statistical Society B. 15, 229-30.
Cox, DR and Snell, EJ. (1968) "A General Definition of Residuals."
Journal of the Royal Statistical Society B. 30, 248-75.
'''
cox_snell = lambda x: (special.betainc(2/3., 2/3., x)
* special.beta(2/3., 2/3.))
return np.sqrt(self.n) * ((cox_snell(endog) - cox_snell(mu)) /
(mu**(1/6.) * (1 - mu)**(1/6.)))
def ttest_ind(a, b, axis=0, equal_var=True):
"""
Calculates the T-test for the means of two independent samples of scores.
Parameters
----------
a, b : array_like
The arrays must have the same shape, except in the dimension
corresponding to `axis` (the first, by default).
axis : int or None, optional
Axis along which to compute test. If None, compute over the whole
arrays, `a`, and `b`.
equal_var : bool, optional
If True, perform a standard independent 2 sample test that assumes equal
population variances.
If False, perform Welch's t-test, which does not assume equal population
variance.
.. versionadded:: 0.17.0
Returns
-------
statistic : float or array
The calculated t-statistic.
pvalue : float or array
The two-tailed p-value.
Notes
-----
For more details on `ttest_ind`, see `stats.ttest_ind`.
"""
a, b, axis = _chk2_asarray(a, b, axis)
if a.size == 0 or b.size == 0:
return 'One of the vector is empty'
(mean1, mean2) = (a.mean(axis), b.mean(axis))
(var1, var2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1))
(n1, n2) = (a.count(axis), b.count(axis))
if equal_var:
# force df to be an array for masked division not to throw a warning
df = ma.asanyarray(n1 + n2 - 2.0)
svar = ((n1 - 1) * var1 + (n2 - 1) * var2) / df
denom = ma.sqrt(svar * (1.0 / n1 + 1.0 / n2)) # n-D computation here!
else:
vn1 = var1 / n1
vn2 = var2 / n2
with np.errstate(divide='ignore', invalid='ignore'):
df = (vn1 + vn2) ** 2 / (vn1 ** 2 / (n1 - 1) + vn2 ** 2 / (n2 - 1))
# If df is undefined, variances are zero.
# It doesn't matter what df is as long as it is not NaN.
df = np.where(np.isnan(df), 1, df)
denom = ma.sqrt(vn1 + vn2)
with np.errstate(divide='ignore', invalid='ignore'):
t = (mean1 - mean2) / denom
probs = special.betainc(0.5 * df, 0.5, df / (df + t * t)).reshape(t.shape)
return Ttest_ind(t, probs.squeeze())