def embed(self, G, rng):
if G.order() < self.cycle_length:
return np.array([0.0], dtype = "f")
succ = 0
max_cycle = sp.binom(G.order(), self.cycle_length)
for _ in range(self.sample_size):
us = rng.choice(G.nodes(), self.cycle_length)
H = G.subgraph(us.tolist())
for c in nx.simple_cycles(H):
if len(c) == self.cycle_length:
succ += 1
break
val = [max_cycle * (succ / self.sample_size)]
return np.array(val, dtype = "f")
python类binom()的实例源码
def embed(self, G, rng):
if G.order() < self.cycle_length:
return np.array([0.0], dtype = "f")
succ, samples = 0, 0
max_cycle = sp.binom(G.order(), self.cycle_length)
for _ in range(self.sample_cap):
us = rng.choice(G.nodes(), self.cycle_length)
H = G.subgraph(us.tolist())
samples += 1
for c in nx.simple_cycles(H):
if len(c) == self.cycle_length:
succ += 1
break
if succ >= self.successes:
break
return np.array([max_cycle * (succ / samples)], dtype = "f")
def test_beta_binomial_two_identical_models(db_path, sampler):
binomial_n = 5
def model_fun(args):
return {"result": st.binom(binomial_n, args.theta).rvs()}
models = [model_fun for _ in range(2)]
models = list(map(SimpleModel, models))
population_size = ConstantPopulationSize(800)
parameter_given_model_prior_distribution = [Distribution(theta=st.beta(
1, 1))
for _ in range(2)]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["result"]),
population_size,
eps=MedianEpsilon(.1),
sampler=sampler)
abc.new(db_path, {"result": 2})
minimum_epsilon = .2
history = abc.run(minimum_epsilon, max_nr_populations=3)
mp = history.get_model_probabilities(history.max_t)
assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
def test_beta_binomial_two_identical_models_adaptive(db_path, sampler):
binomial_n = 5
def model_fun(args):
return {"result": st.binom(binomial_n, args.theta).rvs()}
models = [model_fun for _ in range(2)]
models = list(map(SimpleModel, models))
population_size = AdaptivePopulationSize(800)
parameter_given_model_prior_distribution = [
Distribution(theta=st.beta(1, 1)) for _ in range(2)]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["result"]),
population_size,
eps=MedianEpsilon(.1),
sampler=sampler)
abc.new(db_path, {"result": 2})
minimum_epsilon = .2
history = abc.run(minimum_epsilon, max_nr_populations=3)
mp = history.get_model_probabilities(history.max_t)
assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
heterogeneous_solver.py 文件源码
项目:python-traffic-assignment
作者: megacell
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def shift_polynomial(coef, d):
# given polynomial sum_k a_k x^k -> sum_k a_k (x+d)^k
coef2 = np.zeros(5, dtype="float64")
for i in range(5):
for j in range(i, 5):
coef2[i] = coef2[i] + coef[j] * (d**(j - i)) * sp.binom(j, i)
return coef2
# def shift_graph_2(graph1, graph2, d):
# # numpy matrix implementation of shift_graph
# A = np.zeros((5,5),dtype="float64")
# for i in range(5):
# for j in range(i,5):
# A[j,i] = (d**(j-i)) * sp.binom(j,i)
# graph2[:,3:] = graph2[:,3:].dot(A)
# return graph2
fscore_noodling.py 文件源码
项目:instacart-basket-prediction
作者: colinmorris
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def expected_fscore(n_predicted, n=100, prob_per_instance=.01):
#assert n_predicted in (1, n)
ppi = prob_per_instance
ppibar = (1 - prob_per_instance)
def fscore(fn, tp, fp):
num = 2*tp
denom = 2*tp + fp + fn
return num / denom
parts = []
for ntrue in range(1, n+1):
# ntrue := this many were actually true in the gold standard
if n_predicted == 1:
tp = 1
fn = ntrue - 1
fp = 0
# Prob. of our 1 predicted label being true
prob_weight = ppi
# Prob of ntrue-1 other labels being true out of n-1
prob_weight *= binom(n-1, ntrue-1) * ppi**(ntrue-1) * ppibar**(n-ntrue)
elif n_predicted == n:
tp = ntrue
fn = 0
fp = n - ntrue
prob_weight = binom(n, ntrue) * ppi**ntrue * ppibar**(n-ntrue)
else:
assert False, "fuck it"
for tp in range(1, ntrue):
fp = n_predicted - tp
fn = ntrue - tp
fs = fscore(fn, tp, fp)
dbg('fscore = {}'.format(fs))
dbg('prob weight = {}'.format(prob_weight))
res = fs * prob_weight
parts.append(res)
return parts
def calculate_initial_centroids_first_run(
baskets, neighbors, no_neighbors, item_baskets, use_neighbors, num_rnd_init, verbose=False):
max_no_nbr = list()
max_no_nbr_index = 0
if use_neighbors:
for b in baskets:
no_nbr_list = no_neighbors[b]
no_nbr_tuple = (b, no_nbr_list)
no_nbr = no_nbr_list.count()
if no_nbr > max_no_nbr_index:
max_no_nbr_index = no_nbr
max_no_nbr = list()
max_no_nbr.append(no_nbr_tuple)
elif no_nbr == max_no_nbr_index:
max_no_nbr.append(no_nbr_tuple)
if max_no_nbr_index == 0 or len(max_no_nbr) == len(baskets):
# print 'rnd'
if verbose:
print datetime.datetime.now(), 'init random'
num_rnd_init = min(num_rnd_init, int(binom(len(baskets), 2)))
m0_index, m1_index = random_init(baskets, neighbors, use_neighbors, num=num_rnd_init)
else:
# print 'no nbr'
if verbose:
print datetime.datetime.now(), 'init no neighbors'
m0_index, m1_index = no_nbr_init(max_no_nbr, baskets, neighbors, use_neighbors)
return m0_index, m1_index
def size(N, nb):
"""
Return the size of the boson many-body basis.
Parameters
----------
N : int
Number of sites
nb : int
Number of bosons
"""
return int(binom(nb + N - 1, nb))
def gen_probs(n, a, b):
probs = np.zeros(n+1)
for k in range(n+1):
probs[k] = binom(n, k) * beta(k + a, n - k + b) / beta(a, b)
return probs
def genton(X):
"""
Return the Genton Variogram of the given sample X.
X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h ...., xn, xn + h.
If X.ndim > 1, genton will be called recursively and a list of Cressie-Hawkins Variances is returned.
Genton, M. G., (1998): Highly robust variogram estimation, Math. Geol., 30, 213 - 221.
:param X:
:return:
"""
_X = np.array(X)
if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]):
return np.array([genton(_) for _ in _X])
# check even
if len(_X) % 2 > 0:
raise ValueError('The sample does not have an even length: {}'.format(_X))
# calculate
try:
y = [np.abs( (_X[i] - _X[i + 1]) - (_X[j]) - _X[j + 1] ) for i in np.arange(0, len(_X), 2) for j in np.arange(0, len(_X), 2) if i < j]
# get k k is binom(N(x)/2+1, 2)
k = binom(int(len(_X) / 2 + 1), 2)
# return the kth percentile
return 0.5 * np.power(2.219 * np.percentile(y, k), 2)
except ZeroDivisionError:
return np.nan
def binomial(n):
'''
Return all binomial coefficents for a given order.
For n > 5, scipy.special.binom is used, below we hardcode
to avoid the scipy.special dependancy.
'''
if n == 1: return [1,1]
elif n == 2: return [1,2,1]
elif n == 3: return [1,3,3,1]
elif n == 4: return [1,4,6,4,1]
elif n == 5: return [1,5,10,10,5,1]
else:
from scipy.special import binom
return binom(n,np.arange(n+1))
def _F_mk(Q_p, P, n_p, J, r_b, r_p, z, pikg, sigma):
"""
Complex matrix F_mk from Claesson and Hellstrom (2011), EQ. 34
"""
F = np.zeros((n_p, J), dtype=np.cfloat)
for m in range(n_p):
for k in range(J):
fmk = 0. + 0.j
for n in range(n_p):
# First term
if m != n:
fmk += Q_p[n]*pikg/(k+1)*(r_p[m]/(z[n] - z[m]))**(k+1)
# Second term
fmk += sigma*Q_p[n]*pikg/(k+1)*(r_p[m]*np.conj(z[n])/(
r_b**2 - z[m]*np.conj(z[n])))**(k+1)
for j in range(J):
# Third term
if m != n:
fmk += P[n,j]*binom(j+k+1, j) \
*r_p[n]**(j+1)*(-r_p[m])**(k+1) \
/(z[m] - z[n])**(j+k+2)
# Fourth term
j_pend = np.min((k, j)) + 1
for jp in range(j_pend):
fmk += sigma*np.conj(P[n,j])*binom(j+1, jp) \
*binom(j+k-jp+1, j)*r_p[n]**(j+1) \
*r_p[m]**(k+1)*z[m]**(j+1-jp) \
*np.conj(z[n])**(k+1-jp) \
/(r_b**2 - z[m]*np.conj(z[n]))**(k+j+2-jp)
F[m,k] = fmk
return F
def readout(q, r):
"""C matrix to decode a delay of r*theta from the delay state for theta.
``r`` is a ratio between 0 (``t=0``) and 1 (``t=-theta``).
"""
c = np.zeros(q)
for i in range(q):
j = np.arange(i+1, dtype=np.float64)
c[q-1-i] += 1 / binom(q, i) * np.sum(
binom(q, j) * binom(2*q - 1 - j, i - j) * (-r)**(i - j))
return c
def test_beta_binomial_different_priors(db_path, sampler):
binomial_n = 5
def model(args):
return {"result": st.binom(binomial_n, args['theta']).rvs()}
models = [model for _ in range(2)]
models = list(map(SimpleModel, models))
population_size = ConstantPopulationSize(800)
a1, b1 = 1, 1
a2, b2 = 10, 1
parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
a1, b1)),
Distribution(theta=RV("beta",
a2, b2))]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["result"]),
population_size,
eps=MedianEpsilon(.1),
sampler=sampler)
n1 = 2
abc.new(db_path, {"result": n1})
minimum_epsilon = .2
history = abc.run(minimum_epsilon, max_nr_populations=3)
mp = history.get_model_probabilities(history.max_t)
def B(a, b):
return gamma(a) * gamma(b) / gamma(a + b)
def expected_p(a, b, n1):
return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)
p1_expected_unnormalized = expected_p(a1, b1, n1)
p2_expected_unnormalized = expected_p(a2, b2, n1)
p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
p2_expected_unnormalized)
p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
p2_expected_unnormalized)
assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
def shift_polynomial(coef, d):
# given polynomial sum_k a_k x^k -> sum_k a_k (x+d)^k
coef2 = np.zeros(5, dtype="float64")
for i in range(5):
for j in range(i, 5):
coef2[i] = coef2[i] + coef[j] * (d**(j - i)) * sp.binom(j, i)
return coef2
def _check_subparams(self, n_samples, n_features):
n_subsamples = self.n_subsamples
if self.fit_intercept:
n_dim = n_features + 1
else:
n_dim = n_features
if n_subsamples is not None:
if n_subsamples > n_samples:
raise ValueError("Invalid parameter since n_subsamples > "
"n_samples ({0} > {1}).".format(n_subsamples,
n_samples))
if n_samples >= n_features:
if n_dim > n_subsamples:
plus_1 = "+1" if self.fit_intercept else ""
raise ValueError("Invalid parameter since n_features{0} "
"> n_subsamples ({1} > {2})."
"".format(plus_1, n_dim, n_samples))
else: # if n_samples < n_features
if n_subsamples != n_samples:
raise ValueError("Invalid parameter since n_subsamples != "
"n_samples ({0} != {1}) while n_samples "
"< n_features.".format(n_subsamples,
n_samples))
else:
n_subsamples = min(n_dim, n_samples)
if self.max_subpopulation <= 0:
raise ValueError("Subpopulation must be strictly positive "
"({0} <= 0).".format(self.max_subpopulation))
all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))
n_subpopulation = int(min(self.max_subpopulation, all_combinations))
return n_subsamples, n_subpopulation
def point_probability(k, n, l, p=0.5):
return binom(n - l, k) * p ** k * (1 - p) ** (n - k - l)
distribution_util_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def testLogCombinationsBinomial(self):
n = [2, 5, 12, 15]
k = [1, 2, 4, 11]
log_combs = np.log(special.binom(n, k))
with self.test_session():
n = np.array(n, dtype=np.float32)
counts = [[1., 1], [2., 3], [4., 8], [11, 4]]
log_binom = distribution_util.log_combinations(n, counts)
self.assertEqual([4], log_binom.get_shape())
self.assertAllClose(log_combs, log_binom.eval())
def sample(self, N=100):
"""Compute N points of the Bezier curve. Returns two lists, containing the x and y coordinates."""
def p(t, coord):
n = len(coord)
res = 0
for k in range(n):
res += coord[k] * binom(n - 1, k) * t ** k * (1 - t) ** (n - 1 - k)
return (res)
x = [p(t / N, self.controlPoints_x) for t in range(N)]
y = [p(t / N, self.controlPoints_y) for t in range(N)]
return x, y
def estimation(self, y, ds=None):
""" Estimate the fourth multivariate extension of Spearman's rho.
Parameters
----------
y : (number of samples, dimension)-ndarray
One row of y corresponds to one sample.
ds : int vector, vector of ones
ds[i] = 1 (for all i): the i^th subspace is one-dimensional.
If ds is not given (ds=None), the vector of ones [ds =
ones(y.shape[1],dtype='int')] is emulated inside the function.
Returns
-------
a : float
Estimated fourth multivariate extension of Spearman's rho.
References
----------
Friedrich Shmid, Rafael Schmidt, Thomas Blumentritt, Sandra
Gaiser, and Martin Ruppert. Copula Theory and Its Applications,
Chapter Copula based Measures of Multivariate Association. Lecture
Notes in Statistics. Springer, 2010.
Friedrich Schmid and Rafael Schmidt. Multivariate extensions of
Spearman's rho and related statistics. Statistics & Probability
Letters, 77:407-416, 2007.
Maurice G. Kendall. Rank correlation methods. London, Griffin,
1970.
C. Spearman. The proof and measurement of association between two
things. The American Journal of Psychology, 15:72-101, 1904.
Examples
--------
a = co.estimation(y,ds)
"""
if ds is None: # emulate 'ds = vector of ones'
ds = ones(y.shape[1], dtype='int')
# verification:
self.verification_compatible_subspace_dimensions(y, ds)
self.verification_one_dimensional_subspaces(ds)
num_of_samples, dim = y.shape # number of samples, dimension
u = copula_transformation(y)
m_triu = triu(ones((dim, dim)), 1) # upper triangular mask
b = binom(dim, 2)
a = 12 * sum(dot((1 - u).T, (1 - u)) * m_triu) /\
(b * num_of_samples) - 3
return a
def test_beta_binomial_different_priors_initial_epsilon_from_sample(db_path,
sampler):
binomial_n = 5
def model(args):
return {"result": st.binom(binomial_n, args.theta).rvs()}
models = [model for _ in range(2)]
models = list(map(SimpleModel, models))
population_size = ConstantPopulationSize(800)
a1, b1 = 1, 1
a2, b2 = 10, 1
parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
a1, b1)),
Distribution(theta=RV("beta",
a2, b2))]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["result"]),
population_size,
eps=MedianEpsilon(median_multiplier=.9),
sampler=sampler)
n1 = 2
abc.new(db_path, {"result": n1})
minimum_epsilon = -1
history = abc.run(minimum_epsilon, max_nr_populations=5)
mp = history.get_model_probabilities(history.max_t)
def B(a, b):
return gamma(a) * gamma(b) / gamma(a + b)
def expected_p(a, b, n1):
return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)
p1_expected_unnormalized = expected_p(a1, b1, n1)
p2_expected_unnormalized = expected_p(a2, b2, n1)
p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
p2_expected_unnormalized)
p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
p2_expected_unnormalized)
assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
def fit(self, X, y):
"""Fit linear model.
Parameters
----------
X : numpy array of shape [n_samples, n_features]
Training data
y : numpy array of shape [n_samples]
Target values
Returns
-------
self : returns an instance of self.
"""
random_state = check_random_state(self.random_state)
X, y = check_X_y(X, y, y_numeric=True)
n_samples, n_features = X.shape
n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,
n_features)
self.breakdown_ = _breakdown_point(n_samples, n_subsamples)
if self.verbose:
print("Breakdown point: {0}".format(self.breakdown_))
print("Number of samples: {0}".format(n_samples))
tol_outliers = int(self.breakdown_ * n_samples)
print("Tolerable outliers: {0}".format(tol_outliers))
print("Number of subpopulations: {0}".format(
self.n_subpopulation_))
# Determine indices of subpopulation
if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:
indices = list(combinations(range(n_samples), n_subsamples))
else:
indices = [choice(n_samples,
size=n_subsamples,
replace=False,
random_state=random_state)
for _ in range(self.n_subpopulation_)]
n_jobs = _get_n_jobs(self.n_jobs)
index_list = np.array_split(indices, n_jobs)
weights = Parallel(n_jobs=n_jobs,
verbose=self.verbose)(
delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)
for job in range(n_jobs))
weights = np.vstack(weights)
self.n_iter_, coefs = _spatial_median(weights,
max_iter=self.max_iter,
tol=self.tol)
if self.fit_intercept:
self.intercept_ = coefs[0]
self.coef_ = coefs[1:]
else:
self.intercept_ = 0.
self.coef_ = coefs
return self