def __init__(self, training_set, degree, debug=False):
"""
:param training_set: pypuf.tools.TrainingSet
The trainings set generated by tools.TrainingSet
:param degree: int
The degree up to which the Fourier coefficients are approximated
:param debug: boolean
If true, a progress message with ETA will be periodically printed to stdout
"""
self.training_set = training_set
self.n = len(training_set.challenges[0])
self.monomial_count = 0
for k in range(degree + 1):
self.monomial_count += ncr(self.n, k)
self.degree = degree
self.fourier_coefficients = []
self.debug = debug
python类comb()的实例源码
def get_training_set_size(n, degree, epsilon, delta):
"""
This function calculates the training set size that is needed to satisfy the theoretical requirements of the
Low Degree Algorithm such that the compliance of the epsilon and delta parameters is guaranteed.
:param n: int
Input length
:param degree: int
The degree up to which the Fourier coefficients are approximated
:param epsilon: float
The maximum error rate of the model
:param delta: float
The maximum failure rate of the algorithm, where epsilon is not satisfied
:return:
"""
monomial_count = 0
for k in range(degree + 1):
monomial_count += ncr(n, k)
return int(4 * monomial_count * np.log(2 * monomial_count / delta) / epsilon)
def V_short(self,eta):
sum0 = np.zeros(7,dtype=float)
sum1 = np.zeros(7,dtype=float)
for n1,n2 in product(range(self.N1+1),range(self.N2+1)):
wdo = comb(self.N1,n1,exact=True)*comb(self.N2,n2,exact=True)
wdox10 = comb(self.N1-1,n1,exact=True)*comb(self.N2,n2,exact=True)
wdox11 = comb(self.N1-1,n1-1,exact=True)*comb(self.N2,n2,exact=True)
wdox20 = comb(self.N1,n1,exact=True)*comb(self.N2-1,n2,exact=True)
wdox21 = comb(self.N1,n1,exact=True)*comb(self.N2-1,n2-1,exact=True)
w = np.asarray([wdox10,wdox20,wdox11,wdox21,wdo,wdo,wdo])
pz0,pz1 = self.p_n_given_z(n1,n2)
counts = [self.N1-n1,self.N2-n2,n1,n2,1,1,1]
Q = (eta*pz0*counts*(1-self.pZgivenA)+eta*pz1*counts*self.pZgivenA).sum()
ratio = np.nan_to_num(np.true_divide(pz0*(1-self.pZgivenA)+pz1*self.pZgivenA,Q))
sum0 += np.asfarray(w*pz0*ratio)
sum1 += np.asfarray(w*pz1*ratio)
result = self.pZgivenA*sum1+(1-self.pZgivenA)*sum0
return result
def edit_distance_distribution(sentence_length, vocab_size, tau=1.0):
#from https://gist.github.com/norouzi/8c4d244922fa052fa8ec18d8af52d366
c = numpy.zeros(sentence_length)
for edit_dist in xrange(sentence_length):
n_edits = 0
for n_substitutes in xrange(min(sentence_length, edit_dist)+1):
n_insert = edit_dist - n_substitutes
current_edits = misc.comb(sentence_length, n_substitutes, exact=False) * \
misc.comb(sentence_length+n_insert-n_substitutes, n_insert, exact=False)
n_edits += current_edits
c[edit_dist] = numpy.log(n_edits) + edit_dist * numpy.log(vocab_size)
c[edit_dist] = c[edit_dist] - edit_dist / tau - edit_dist / tau * numpy.log(vocab_size)
c = numpy.exp(c)
c /= numpy.sum(c)
return c
def get_n_splits(self, X, y=None, labels=None):
"""Returns the number of splitting iterations in the cross-validator
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : object
Always ignored, exists for compatibility.
labels : object
Always ignored, exists for compatibility.
"""
if X is None:
raise ValueError("The X parameter should not be None")
return int(comb(_num_samples(X), self.p, exact=True))
def get_n_splits(self, X, y, labels):
"""Returns the number of splitting iterations in the cross-validator
Parameters
----------
X : object
Always ignored, exists for compatibility.
y : object
Always ignored, exists for compatibility.
labels : array-like, with shape (n_samples,), optional
Group labels for the samples used while splitting the dataset into
train/test set.
Returns
-------
n_splits : int
Returns the number of splitting iterations in the cross-validator.
"""
if labels is None:
raise ValueError("The labels parameter should not be None")
return int(comb(len(np.unique(labels)), self.n_labels, exact=True))
def _prob_no_occurrence(
sample_size,
subsample_size,
sample_freq,
num_subsamples
):
"""Compute the probability that an type with a given probability does not
occur in a subsample of given size drawn from a population of a given size.
"""
if sample_freq > sample_size - subsample_size:
return 0
else:
return binom(
sample_size-sample_freq,
subsample_size,
exact=True
) / num_subsamples
def comb(n, k):
return factorial(n) / (factorial(k) * factorial(n - k))
def NumCoefficients(self):
"""
There are (n+d) choose n coefficients where n is the degree of the polynomial and d is the dimension
:return: Return the number of coefficients corresponding to the polynomial given the degree and dimension
"""
return nchoosek(self.degree + self.dimension, self.degree, exact=True)
def combinations(seq, k):
""" Return j length subsequences of elements from the input iterable.
This version uses Numpy/Scipy and should be preferred over itertools. It avoids
the creation of all intermediate Python objects.
Examples
--------
>>> import numpy as np
>>> from itertools import combinations as iter_comb
>>> x = np.arange(3)
>>> c1 = combinations(x, 2)
>>> print(c1) # doctest: +NORMALIZE_WHITESPACE
[[0 1]
[0 2]
[1 2]]
>>> c2 = np.array(tuple(iter_comb(x, 2)))
>>> print(c2) # doctest: +NORMALIZE_WHITESPACE
[[0 1]
[0 2]
[1 2]]
"""
from itertools import combinations as _combinations, chain
from scipy.misc import comb
count = comb(len(seq), k, exact=True)
res = np.fromiter(chain.from_iterable(_combinations(seq, k)),
int, count=count*k)
return res.reshape(-1, k)
def hamming_distance_distribution(sentence_length, vocab_size, tau=1.0):
#based on https://gist.github.com/norouzi/8c4d244922fa052fa8ec18d8af52d366
c = numpy.zeros(sentence_length)
for edit_dist in xrange(sentence_length):
n_edits = misc.comb(sentence_length, edit_dist)
#reweight
c[edit_dist] = numpy.log(n_edits) + edit_dist * numpy.log(vocab_size)
c[edit_dist] = c[edit_dist] - edit_dist / tau - edit_dist / tau * numpy.log(vocab_size)
c = numpy.exp(c)
c /= numpy.sum(c)
return c
test_lagrange_points.py 文件源码
项目:finite-element-course
作者: finite-element
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_point_count(cell, degree):
p = lagrange_points(cell, degree)
assert p.shape[0] == np.round(comb(degree + cell.dim, cell.dim))
# Check that the average of the points is the circumcentre.
# This basic test follows by symmetry.
test_entity_nodes.py 文件源码
项目:finite-element-course
作者: finite-element
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def test_nodes_per_entity(cell, degree):
fe = LagrangeElement(cell, degree)
for d in range(cell.dim+1):
node_count = comb(degree-1, d)
for e in fe.entity_nodes[d].values():
assert len(e) == node_count
test_vandermonde_matrix_grad.py 文件源码
项目:finite-element-course
作者: finite-element
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_vandermonde_matrix_grad_shape(cell, degree):
points = np.ones((1, cell.dim))
shape = vandermonde_matrix(cell, degree, points, grad=True).shape
correct_shape = (1, round(comb(degree + cell.dim, cell.dim)), cell.dim)
assert shape == correct_shape, \
"vandermonde matrix should have returned an array of shape %s, not %s"\
% (correct_shape, shape)
test_vandermonde_matrix.py 文件源码
项目:finite-element-course
作者: finite-element
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_vandermonde_matrix_size(cell, degree):
points = np.ones((1, cell.dim))
shape = vandermonde_matrix(cell, degree, points).shape
correct_shape = (1, np.round(comb(degree + cell.dim, cell.dim)))
assert shape == correct_shape, \
"vandermonde matrix should have returned an array of shape %s, not %s"\
% (correct_shape, shape)
def bernstein_polynomials(i, n, t):
return comb(n, i) * (t ** (n - i)) * ((1 - t) ** i)
# Get points for bezier curve
def easyline(n):
return comb(2 * n, n, exact = True)
def RandIndex(labels_true, labels_pred):
n_samples = len(labels_true)
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
a = sum(comb2(n_ij) for n_ij in contingency.data)
b = sum(comb2(n_c) for n_c in np.ravel(contingency.sum(axis=0))) - a
c = sum(comb2(n_c) for n_c in np.ravel(contingency.sum(axis=1))) - a
d = comb(n_samples, 2) - a - b - c
return (a + d) / comb(n_samples, 2)
def comb2(n):
# the exact version is faster for k == 2: use it by default globally in
# this module instead of the float approximate variant
return comb(n, 2, exact=1)
def check_sample_int_distribution(sample_without_replacement):
# This test is heavily inspired from test_random.py of python-core.
#
# For the entire allowable range of 0 <= k <= N, validate that
# sample generates all possible permutations
n_population = 10
# a large number of trials prevents false negatives without slowing normal
# case
n_trials = 10000
for n_samples in range(n_population):
# Counting the number of combinations is not as good as counting the
# the number of permutations. However, it works with sampling algorithm
# that does not provide a random permutation of the subset of integer.
n_expected = combinations(n_population, n_samples, exact=True)
output = {}
for i in range(n_trials):
output[frozenset(sample_without_replacement(n_population,
n_samples))] = None
if len(output) == n_expected:
break
else:
raise AssertionError(
"number of combinations != number of expected (%s != %s)" %
(len(output), n_expected))
def get_expected_subsample_variety(dictionary, subsample_size):
"""Compute the expected variety of a subsample of given size drawn from a
given frequency dictionary.
"""
sample_size = sum(dictionary.values())
if subsample_size > sample_size:
raise ValueError(u'Not enough elements in dictionary')
num_subsamples = binom(sample_size, subsample_size, exact=True)
expected_variety = len(dictionary)
for freq in dictionary.values():
expected_variety -= _prob_no_occurrence(
sample_size, subsample_size, freq, num_subsamples
)
return expected_variety
def gevrey_tanh(T, n, sigma=sigma_tanh, K=K_tanh):
"""
Provide the flat output y(t) = phi(t), with the gevrey-order
1+1/sigma, and the derivatives up to order n.
:param t: [0, ... , t_end] (numpy array)
:param n: (integer)
:param sigma: (float)
:param K: (float)
:return: np.array([[phi], ... ,[phi^(n)]])
"""
t_init = t = np.linspace(0., T, int(0.5*10**(2+np.log10(T))))
# pop
t = np.delete(t, 0, 0)
t = np.delete(t, -1, 0)
# main
tau = t/T
a = dict()
a[0] = K*(4*tau*(1-tau))**(1-sigma)/(2*(sigma-1))
a[1] = (2*tau - 1)*(sigma-1)/(tau*(1-tau))*a[0]
for k in range(2, n+2):
a[k] = (tau*(1-tau))**-1 * ((sigma-2+k)*(2*tau-1)*a[k-1]+(k-1)*(2*sigma-4+k)*a[k-2])
yy = dict()
yy[0] = np.tanh(a[1])
if n > 0:
yy[1] = a[2]*(1-yy[0]**2)
z = dict()
z[0] = (1-yy[0]**2)
for i in range(2, n+1):
sum_yy = np.zeros(len(t))
for k in range(i):
if k == 0:
sum_z = np.zeros(len(t))
for j in range(i):
sum_z += -sm.comb(i-1, j)*yy[j]*yy[i-1-j]
z[i-1] = sum_z
sum_yy += sm.comb(i-1, k)*a[k+2]*z[i-1-k]
yy[i] = sum_yy
# push
phi = np.nan*np.zeros((n+1, len(t)+2))
for i in range(n+1):
phi_temp = 0.5*yy[i]
if i == 0:
phi_temp += 0.5
phi_temp = np.insert(phi_temp, 0, [0.], axis=0)
phi[i, :] = np.append(phi_temp, [1.])
else:
phi_temp = np.insert(phi_temp, 0, [0.], axis=0)
# attention divide by T^i
phi[i, :] = np.append(phi_temp, [0.])/T**i
return phi, t_init
def _power_series_flat_out(z, t, n, param, y, bound_cond_type):
""" Provide the power series approximation for x(z,t) and x'(z,t).
:param z: [0, ..., l] (numpy array)
:param t: [0, ... , t_end] (numpy array)
:param n: series termination index (integer)
:param param: [a2, a1, a0, alpha, beta] (list)
:param y: flat output with derivation np.array([[y],...,[y^(n/2)]])
:return: field variable x(z,t) and spatial derivation x'(z,t)
"""
# TODO: documentation
a2, a1, a0, alpha, beta = param
shape = (len(t), len(z))
x = np.nan*np.ones(shape)
d_x = np.nan*np.ones(shape)
# Actually power_series() is designed for robin boundary condition by z=0.
# With the following modification it can also used for dirichlet boundary condition by z=0.
if bound_cond_type is 'robin':
is_robin = 1.
elif bound_cond_type is 'dirichlet':
alpha = 1.
is_robin = 0.
else:
raise ValueError("Selected Boundary condition {0} not supported! Use 'robin' or 'dirichlet'".format(
bound_cond_type))
# TODO: flip iteration order: z <--> t, result: one or two instead len(t) call's
for i in range(len(t)):
sum_x = np.zeros(len(z))
for j in range(n):
sum_b = np.zeros(len(z))
for k in range(j+1):
sum_b += sm.comb(j, k)*(-a0)**(j-k)*y[k, i]
sum_x += (is_robin+alpha*z/(2.*j+1.))*z**(2*j)/sm.factorial(2*j)/a2**j*sum_b
x[i, :] = sum_x
for i in range(len(t)):
sum_x = np.zeros(len(z))
for j in range(n):
sum_b = np.zeros(len(z))
for k in range(j+2):
sum_b += sm.comb(j+1, k)*(-a0)**(j-k+1)*y[k, i]
if j == 0:
sum_x += alpha*y[0, i]
sum_x += (is_robin+alpha*z/(2.*(j+1)))*z**(2*j+1)/sm.factorial(2*j+1)/a2**(j+1)*sum_b
d_x[i, :] = sum_x
return x, d_x
def test_cross_validator_with_default_params():
n_samples = 4
n_unique_labels = 4
n_folds = 2
p = 2
n_iter = 10 # (the default value)
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
X_1d = np.array([1, 2, 3, 4])
y = np.array([1, 1, 2, 2])
labels = np.array([1, 2, 3, 4])
loo = LeaveOneOut()
lpo = LeavePOut(p)
kf = KFold(n_folds)
skf = StratifiedKFold(n_folds)
lolo = LeaveOneLabelOut()
lopo = LeavePLabelOut(p)
ss = ShuffleSplit(random_state=0)
ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2
loo_repr = "LeaveOneOut()"
lpo_repr = "LeavePOut(p=2)"
kf_repr = "KFold(n_folds=2, random_state=None, shuffle=False)"
skf_repr = "StratifiedKFold(n_folds=2, random_state=None, shuffle=False)"
lolo_repr = "LeaveOneLabelOut()"
lopo_repr = "LeavePLabelOut(n_labels=2)"
ss_repr = ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, "
"train_size=None)")
ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds,
n_unique_labels, comb(n_unique_labels, p), n_iter, 2]
for i, (cv, cv_repr) in enumerate(zip(
[loo, lpo, kf, skf, lolo, lopo, ss, ps],
[loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
ss_repr, ps_repr])):
# Test if get_n_splits works correctly
assert_equal(n_splits[i], cv.get_n_splits(X, y, labels))
# Test if the cross-validator works as expected even if
# the data is 1d
np.testing.assert_equal(list(cv.split(X, y, labels)),
list(cv.split(X_1d, y, labels)))
# Test that train, test indices returned are integers
for train, test in cv.split(X, y, labels):
assert_equal(np.asarray(train).dtype.kind, 'i')
assert_equal(np.asarray(train).dtype.kind, 'i')
# Test if the repr works without any errors
assert_equal(cv_repr, repr(cv))