def compute_error(y, m, v, lik, median=False, no_samples=50):
if lik == 'gauss':
y = y.reshape((y.shape[0],))
if median:
rmse = np.sqrt(np.median((y - m)**2))
else:
rmse = np.sqrt(np.mean((y - m)**2))
return rmse
elif lik == 'cdf':
K = no_samples
fs = draw_randn_samples(K, m, v).T
log_factor = stats.norm.logcdf(np.tile(y.reshape((y.shape[0], 1)), (1, K)) * fs)
ll = logsumexp(log_factor - np.log(K), 1)
return 1 - np.mean(ll > np.log(0.5))
python类logsumexp()的实例源码
def log_shannon_entropy(log_p):
"""Calculates shannon entropy in bits.
Parameters
----------
p : np.array
array of probabilities
Returns
-------
shannon entropy in bits
"""
out = -logsumexp(log_p + np.log(log_p))
return out
def __init__(self, values, log_weights=None):
self.length = len(values)
if log_weights is None:
log_weights = np.full(len(values), -math.log(len(values))) # assume uniform
else:
log_weights = np.array(log_weights)
weights = np.exp(log_weights - logsumexp(log_weights))
self.distribution = collections.defaultdict(float)
for i in range(self.length):
self.distribution[values[i]] += weights[i]
self.distribution = collections.OrderedDict(sorted(self.distribution.items()))
self.values = np.array(list(self.distribution.keys()))
self.weights = np.array(list(self.distribution.values()))
def predict_proba(self, docs):
scores = np.hstack(cur_model.score(docs).reshape(-1, 1) for cur_model in self.models)
if self.class_priors:
scores += self.class_scores
return np.exp(scores - logsumexp(scores, axis=1, keepdims=True))
def _compute_log_likelihood(self, X):
n_samples, _ = X.shape
res = np.zeros((n_samples, self.n_components))
for i in range(self.n_components):
log_denses = self._compute_log_weighted_gaussian_densities(X, i)
res[:, i] = logsumexp(log_denses, axis=1)
return res
def _do_forward_pass(self, framelogprob):
n_samples, n_components = framelogprob.shape
fwdlattice = np.zeros((n_samples, n_components))
_hmmc._forward(n_samples, n_components,
log_mask_zero(self.startprob_),
log_mask_zero(self.transmat_),
framelogprob, fwdlattice)
return logsumexp(fwdlattice[-1]), fwdlattice
def _W_nj(self):
"""The sample sorted sample weights in all states.
FOR INTERNAL USE ONLY!
"""
m = self.nstates_sampled
logQ_nj = self._f[newaxis, :] - self._u_nj
logNorm_n = logsumexp(logQ_nj[:, :m], 1, self.PIsdiag[newaxis, :])
_W_nj = exp(logQ_nj - logNorm_n[:, newaxis])
return _W_nj
def compute_unsampled_weights(self, u_jn):
"""Compute the sample weights for unsampled states. This requires the
observed reduced potential energies on the complete sample set.
NB Use of this function can be obviated by simply including the
unsampled states in the initial calculation.
Arguments
---------
u_jn : array-like
2d array-like of reduced potential energies. The dimensions must
be L x N, where L is the number of states to compute free energies
for and N is the total sample size.
Returns
-------
W_nj_k : ndarray
Sample weights for the N samples in each of L states
"""
u_nj_k = asarray(u_jn).T
# NB ESTIMATING ERRORS HERE WILL CAUSE AN INFINITE LOOP!
f_k, varf_k = self.compute_unsampled_free_energies(u_jn, False)
logQ_nj_k = f_k[newaxis, :] - u_nj_k
m = self.nstates_sampled
logQ_nj = self._f[newaxis, :] - self._u_nj
logNorm_n = logsumexp(logQ_nj[:, :m], 1, self.PIsdiag[newaxis, :])
return exp(logQ_nj_k - logNorm_n[:, newaxis])
def gibbs_update_mask(self, i_iter):
mask_old = copy.deepcopy(self.mask)
assert self.common_component.K==1, "common component can only have one cluster component"
for i_dim in range(self.D):
mask_new = copy.deepcopy(self.mask)
# Compute log probability of `mask[i]`
log_prob_mask = np.zeros(2, np.float)
mask_new[i_dim] = 0
log_prob_mask[0] = self.log_marg_mask(mask_new)
mask_new[i_dim] = 1
log_prob_mask[1] = self.log_marg_mask(mask_new)
prob_mask = np.exp(log_prob_mask - logsumexp(log_prob_mask))
# Sample the new component assignment for `mask[i]`
k = utils.draw(prob_mask)
self.mask[i_dim] = k
self.p_bern = np.random.beta(self.bern_prior.a + np.sum(self.mask),
self.bern_prior.b + self.D - np.sum(self.mask), 1)[0]
self.make_robust_p_bern()
if i_iter % 20 == 0:
logging.info('p_bern_new: {}'.format(self.p_bern))
logging.info('mask old: {}'.format(mask_old))
logging.info('mask new: {}'.format(self.mask))
def logpdf(self, rowid, targets, constraints=None, inputs=None):
assert targets.keys() == self.outputs
assert inputs.keys() == self.inputs
assert not constraints
x = inputs[self.inputs[0]]
y = targets[self.outputs[0]]
return logsumexp([
np.log(.5)+self.uniform.logpdf(y-x**2),
np.log(.5)+self.uniform.logpdf(-y-x**2)
])
def _geq(self, X):
numerator = self._log_proba(X)
denominator = logsumexp(numerator, axis=1)
denominator = denominator.reshape(len(denominator), 1)
return numerator - denominator
def _less(self, X):
numerator = self._log_proba(X) + np.log(len(self.classes_) - 1)
denominator = logsumexp(numerator, axis=1)
denominator = denominator.reshape(len(denominator), 1)
return (numerator - denominator) + np.exp(-1) + np.exp(1)
def outActivate(aValue, logScale=False):
"""
out activate function: softmax; same dimension as aValue
"""
if logScale:
return aValue - misc.logsumexp(aValue)
else:
return np.exp(aValue) / np.sum(np.exp(aValue), axis=0)
## node a: pre-activation
def calc_prob_same_diff(self, data_pairs, return_log=True, norm_probs=True,
return_prob='same'):
assert data_pairs.shape[-2] == 2
assert isinstance(return_log, bool)
assert return_prob == 'same' or return_prob == 'diff'
log_ps_diff = self.calc_probs_diff(data_pairs)
log_ps_same = self.calc_probs_same(data_pairs)
assert log_ps_diff.shape[-2] == log_ps_diff.shape[-1]
assert log_ps_diff.shape[-1] == log_ps_same.shape[-1]
log_prob_diff = logsumexp(log_ps_diff, axis=(-1, -2))
log_prob_same = logsumexp(log_ps_same, axis=-1) + np.log(6)
# Since there are 42 "different probabilities" and 7 "same".
# Multiplying by six makes the prior 50/50 on the same/diff task.
if return_prob == 'same':
log_probs = log_prob_same
else:
log_probs = log_prob_diff
if norm_probs is True:
norms = logsumexp([log_prob_diff, log_prob_same], axis=0)
log_probs = log_probs - norms
if return_log is True:
return log_probs
else:
return np.exp(log_probs)
def held_out_log_predicitive(clustering, dist, partition_prior, test_data, train_data, per_point=False):
clustering = relabel_clustering(clustering)
block_params = []
log_cluster_prior = []
block_ids = sorted(np.unique(clustering))
for z in block_ids:
params = dist.create_params_from_data(train_data[clustering == z])
block_params.append(params)
log_cluster_prior.append(partition_prior.log_tau_2_diff(params.N))
num_blocks = len(block_ids)
block_params.append(dist.create_params())
log_cluster_prior.append(partition_prior.log_tau_1_diff(num_blocks))
log_cluster_prior = np.array(log_cluster_prior)
log_cluster_prior = log_normalize(log_cluster_prior)
log_p = np.zeros((test_data.shape[0], len(log_cluster_prior)))
for z, (w, params) in enumerate(zip(log_cluster_prior, block_params)):
log_p[:, z] = w + dist.log_predictive_likelihood_bulk(test_data, params)
if per_point:
return log_sum_exp(log_p, axis=1)
else:
return np.sum(log_sum_exp(log_p, axis=1))
def _get_eos_prob(self):
"""Get loglikelihood according cur_p, cur_r, and n_consumed """
eos_point_prob = self._get_eos_point_prob(max(
1,
self.n_consumed - self.offset))
if self.use_point_probs:
return eos_point_prob - self.max_eos_prob
if not self.prev_eos_probs:
self.prev_eos_probs.append(eos_point_prob)
return eos_point_prob
# bypass utils.log_sum because we always want to use logsumexp here
prev_sum = logsumexp(np.asarray([p for p in self.prev_eos_probs]))
self.prev_eos_probs.append(eos_point_prob)
# Desired prob is eos_point_prob / (1-last_eos_probs_sum)
return eos_point_prob - np.log(1.0-np.exp(prev_sum))
def predict_next(self):
"""Looks up ngram scores via self.scores. """
cur_hist_length = len(self.history)
this_scores = [[] for _ in xrange(cur_hist_length+1)]
this_unk_scores = [[] for _ in xrange(cur_hist_length+1)]
for pos in xrange(len(self.scores)):
this_scores[0].append(self.scores[pos])
this_unk_scores[0].append(self.unk_scores[pos])
acc = 0.0
for order, word in enumerate(self.history):
if pos + order + 1 >= len(self.scores):
break
acc += utils.common_get(
self.scores[pos + order], word,
self.unk_scores[pos + order])
this_scores[order+1].append(acc + self.scores[pos + order + 1])
this_unk_scores[order+1].append(
acc + self.unk_scores[pos + order + 1])
combined_scores = []
combined_unk_scores = []
for order, (scores, unk_scores) in enumerate(zip(this_scores,
this_unk_scores)):
if scores and order + 1 >= self.min_order:
score_matrix = np.vstack(scores)
combined_scores.append(logsumexp(score_matrix, axis=0))
combined_unk_scores.append(utils.log_sum(unk_scores))
if not combined_scores:
self.cur_unk_score = 0.0
return {}
self.cur_unk_score = sum(combined_unk_scores)
return sum(combined_scores)
def log_sum_log_semiring(vals):
"""Uses the ``logsumexp`` function in scipy to calculate the log of
the sum of a set of log values.
Args:
vals (set): List or set of numerical values
"""
return logsumexp(numpy.asarray([val for val in vals]))
#log_sum = log_sum_log_semiring
def log_normalize(log_vec, axis=0):
axes = [slice(None)] * len(log_vec.shape)
axes[axis] = np.newaxis
return log_vec - logsumexp(log_vec, axis=axis)[axes]
def cat_error(preds, targets, num_outputs):
# NOTE: preds matri gives log likelihood of result, not likelihood probability
# raise to exponential to get correct value
# Use Brier score for error estimate
preds = preds - logsumexp(preds, axis=1, keepdims=True)
pred_probs = np.exp(preds)
return np.mean(np.linalg.norm(pred_probs - targets, axis=1))