def get_acceptance_rate(q, p, new_q, new_p, log_posterior, mass, data_axes):
old_hamiltonian, old_log_prob = hamiltonian(
q, p, log_posterior, mass, data_axes)
new_hamiltonian, new_log_prob = hamiltonian(
new_q, new_p, log_posterior, mass, data_axes)
old_log_prob = tf.check_numerics(
old_log_prob,
'HMC: old_log_prob has numeric errors! Try better initialization.')
acceptance_rate = tf.exp(
tf.minimum(-new_hamiltonian + old_hamiltonian, 0.0))
is_finite = tf.logical_and(tf.is_finite(acceptance_rate),
tf.is_finite(new_log_prob))
acceptance_rate = tf.where(is_finite, acceptance_rate,
tf.zeros_like(acceptance_rate))
return old_hamiltonian, new_hamiltonian, old_log_prob, new_log_prob, \
acceptance_rate
python类check_numerics()的实例源码
def __init__(self,
rate,
dtype=None,
group_ndims=0,
check_numerics=False,
**kwargs):
self._rate = tf.convert_to_tensor(rate)
param_dtype = assert_same_float_dtype(
[(self._rate, 'Poisson.rate')])
if dtype is None:
dtype = tf.int32
assert_same_float_and_int_dtype([], dtype)
self._check_numerics = check_numerics
super(Poisson, self).__init__(
dtype=dtype,
param_dtype=param_dtype,
is_continuous=False,
is_reparameterized=False,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
logits = self.logits
n = tf.cast(self.n_experiments, self.param_dtype)
given = tf.cast(given, self.param_dtype)
log_1_minus_p = -tf.nn.softplus(logits)
lgamma_n_plus_1 = tf.lgamma(n + 1)
lgamma_given_plus_1 = tf.lgamma(given + 1)
lgamma_n_minus_given_plus_1 = tf.lgamma(n - given + 1)
if self._check_numerics:
lgamma_given_plus_1 = tf.check_numerics(
lgamma_given_plus_1, "lgamma(given + 1)")
lgamma_n_minus_given_plus_1 = tf.check_numerics(
lgamma_n_minus_given_plus_1, "lgamma(n - given + 1)")
return lgamma_n_plus_1 - lgamma_n_minus_given_plus_1 - \
lgamma_given_plus_1 + given * logits + n * log_1_minus_p
def _log_prob(self, given):
temperature, logits = self.path_param(self.temperature), \
self.path_param(self.logits)
log_given = tf.log(given)
log_1_minus_given = tf.log(1 - given)
log_temperature = tf.log(temperature)
if self._check_numerics:
log_given = tf.check_numerics(log_given, "log(given)")
log_1_minus_given = tf.check_numerics(
log_1_minus_given, "log(1 - given)")
log_temperature = tf.check_numerics(
log_temperature, "log(temperature)")
logistic_given = log_given - log_1_minus_given
temp = temperature * logistic_given - logits
return log_temperature - log_given - log_1_minus_given + \
temp - 2 * tf.nn.softplus(temp)
def build_graph(self, weights, loss=None, optimizer=None, norm=False, batch_size=None, grad_ys=None):
if loss is not None:
gradients = tf.gradients(loss.node, list(utils.Utils.flatten(weights.node)), grad_ys)
gradients = [tf.check_numerics(g, 'gradient_%d' % i) for i, g in enumerate(gradients)]
if batch_size is not None:
gradients = [g / float(batch_size) for g in gradients]
# store gradients global norm before clipping
self.global_norm = tf.global_norm(gradients)
# clip gradients after global norm has been stored
if norm:
gradients, _ = tf.clip_by_global_norm(gradients, norm)
self.calculate = graph.TfNode(utils.Utils.reconstruct(gradients, weights.node))
if optimizer is not None:
self.ph_gradients = graph.Placeholders(weights)
self.apply = graph.TfNode(optimizer.node.apply_gradients(
utils.Utils.izip(self.ph_gradients.checked, weights.node)))
def build_graph(self, dtype, shape=None, name=None):
"""Assemble one placeholder.
Args:
shape: The shape of the tensor to be fed (optional). If the shape is not
specified, you can feed a tensor of any shape.
dtype: The type of elements in the placeholder to be fed.
name: A name for the placeholder (optional).
Returns:
placeholder of given shape and data type
"""
ph = tf.placeholder(self.DTYPE[dtype], shape=shape, name=name)
if dtype not in [np.int32, np.int64]:
self.checked = tf.check_numerics(ph, '')
return ph
def __init__(self, length_scale=1.0, magnitude=1.0, check_numerics=True,
debug=False):
assert np.isscalar(length_scale)
assert np.isscalar(magnitude)
assert length_scale > 0 and magnitude > 0
self.length_scale = length_scale
self.magnitude = magnitude
self.check_numerics = check_numerics
self.debug = debug
self.X_train = None
self.y_train = None
self.xy_ = None
self.K = None
self.graph = None
self.vars = None
self.ops = None
def fully_connected(in_tensor, layer_name, out_chan, trainable=True):
with tf.variable_scope(layer_name):
in_size = in_tensor.get_shape().as_list()
assert len(in_size) == 2, 'Input to a fully connected layer must be a vector.'
weights_shape = [in_size[1], out_chan]
# weight matrix
weights = tf.get_variable('weights', weights_shape, tf.float32,
tf.contrib.layers.xavier_initializer(), trainable=trainable)
weights = tf.check_numerics(weights, 'weights: %s' % layer_name)
# bias
biases = tf.get_variable('biases', [out_chan], tf.float32,
tf.constant_initializer(0.0001), trainable=trainable)
biases = tf.check_numerics(biases, 'biases: %s' % layer_name)
out_tensor = tf.matmul(in_tensor, weights) + biases
return out_tensor
def v_from_u(u, log_alpha, force_same=True, b=None, v_prime=None):
u_prime = tf.nn.sigmoid(-log_alpha)
if not force_same:
v = b*(u_prime+v_prime*(1-u_prime)) + (1-b)*v_prime*u_prime
else:
v_1 = (u - u_prime) / safe_clip(1 - u_prime)
v_1 = tf.clip_by_value(v_1, 0, 1)
v_1 = tf.stop_gradient(v_1)
v_1 = v_1 * (1 - u_prime) + u_prime
v_0 = u / safe_clip(u_prime)
v_0 = tf.clip_by_value(v_0, 0, 1)
v_0 = tf.stop_gradient(v_0)
v_0 = v_0 * u_prime
v = tf.where(u > u_prime, v_1, v_0)
v = tf.check_numerics(v, 'v sampling is not numerically stable.')
if force_same:
v = v + tf.stop_gradient(-v + u) # v and u are the same up to numerical errors
return v
def v_from_u(u, log_alpha, force_same=True):
# Lovingly copied from https://github.com/tensorflow/models/blob/master/research/rebar/rebar.py
u_prime = tf.nn.sigmoid(-log_alpha)
v_1 = (u - u_prime) / safe_clip(1 - u_prime)
v_1 = tf.clip_by_value(v_1, 0, 1)
v_1 = tf.stop_gradient(v_1)
v_1 = v_1 * (1 - u_prime) + u_prime
v_0 = u / safe_clip(u_prime)
v_0 = tf.clip_by_value(v_0, 0, 1)
v_0 = tf.stop_gradient(v_0)
v_0 = v_0 * u_prime
v = tf.where(u > u_prime, v_1, v_0)
v = tf.check_numerics(v, 'v sampling is not numerically stable.')
if force_same:
v = v + tf.stop_gradient(-v + u) # v and u are the same up to numerical errors
return v
def _u_to_v(self, log_alpha, u, eps = 1e-8):
"""Convert u to tied randomness in v."""
u_prime = tf.nn.sigmoid(-log_alpha) # g(u') = 0
v_1 = (u - u_prime) / tf.clip_by_value(1 - u_prime, eps, 1)
v_1 = tf.clip_by_value(v_1, 0, 1)
v_1 = tf.stop_gradient(v_1)
v_1 = v_1*(1 - u_prime) + u_prime
v_0 = u / tf.clip_by_value(u_prime, eps, 1)
v_0 = tf.clip_by_value(v_0, 0, 1)
v_0 = tf.stop_gradient(v_0)
v_0 = v_0 * u_prime
v = tf.where(u > u_prime, v_1, v_0)
v = tf.check_numerics(v, 'v sampling is not numerically stable.')
v = v + tf.stop_gradient(-v + u) # v and u are the same up to numerical errors
return v
def qa_rnn(config, is_training, input_seqs, input_masks, xvector=None):
"""Model that takes as input several input sequences, output the encoded vector
for each of the sequence.
Args:
is_training: boolean. Indictates if the model is used for training or not.
input_seq: 2-D list of tensors, each is a [batch_size * embed_size] tensor.
"""
embed_seqs, atten_seqs, all_output_seqs = \
sentences_encoding(config, is_training, input_seqs, input_masks, xvector)
if NUMERIC_CHECK:
embed_seqs = tf.check_numerics(embed_seqs, 'qa_rnn output embedding numeric error')
atten_seqs = tf.check_numerics(atten_seqs, 'qa_rnn output attention numeric error')
all_output_seqs = tf.check_numerics(all_output_seqs, 'qa_rnn output numeric error')
return embed_seqs, atten_seqs, all_output_seqs
def _log_prob(self, given):
mean, logstd = self.path_param(self.mean),\
self.path_param(self.logstd)
c = -0.5 * np.log(2 * np.pi)
precision = tf.exp(-2 * logstd)
if self._check_numerics:
precision = tf.check_numerics(precision, "precision")
return c - logstd - 0.5 * precision * tf.square(given - mean)
def _log_prob(self, given):
mean, logstd = self.path_param(self.mean), \
self.path_param(self.logstd)
c = -0.5 * (np.log(2.0) + np.log(np.pi))
precision = tf.exp(-2.0 * logstd)
if self._check_numerics:
precision = tf.check_numerics(precision, "precision")
mask = tf.log(tf.cast(given >= 0., dtype=precision.dtype))
return (c - (logstd + 0.5 * precision * tf.square(given - mean)) + \
tf.nn.softplus(-2.0 * mean * given * precision)) + mask
def _log_prob(self, given):
log_p = tf.log(self._prob(given))
if self._check_numerics:
log_p = tf.check_numerics(log_p, "log_p")
return log_p
def _prob(self, given):
mask = tf.cast(tf.logical_and(tf.less_equal(self.minval, given),
tf.less(given, self.maxval)),
self.dtype)
p = 1. / (self.maxval - self.minval)
if self._check_numerics:
p = tf.check_numerics(p, "p")
return p * mask
def __init__(self,
alpha,
beta,
group_ndims=0,
check_numerics=False,
**kwargs):
self._alpha = tf.convert_to_tensor(alpha)
self._beta = tf.convert_to_tensor(beta)
dtype = assert_same_float_dtype(
[(self._alpha, 'Gamma.alpha'),
(self._beta, 'Gamma.beta')])
try:
tf.broadcast_static_shape(self._alpha.get_shape(),
self._beta.get_shape())
except ValueError:
raise ValueError(
"alpha and beta should be broadcastable to match each "
"other. ({} vs. {})".format(
self._alpha.get_shape(), self._beta.get_shape()))
self._check_numerics = check_numerics
super(Gamma, self).__init__(
dtype=dtype,
param_dtype=dtype,
is_continuous=True,
is_reparameterized=False,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
alpha, beta = self.alpha, self.beta
log_given = tf.log(given)
log_beta = tf.log(beta)
lgamma_alpha = tf.lgamma(alpha)
if self._check_numerics:
log_given = tf.check_numerics(log_given, "log(given)")
log_beta = tf.check_numerics(log_beta, "log(beta)")
lgamma_alpha = tf.check_numerics(lgamma_alpha, "lgamma(alpha)")
return alpha * log_beta - lgamma_alpha + (alpha - 1) * log_given - \
beta * given
def __init__(self,
alpha,
beta,
dtype=None,
group_ndims=0,
check_numerics=False,
**kwargs):
self._alpha = tf.convert_to_tensor(alpha)
self._beta = tf.convert_to_tensor(beta)
dtype = assert_same_float_dtype(
[(self._alpha, 'Beta.alpha'),
(self._beta, 'Beta.beta')])
try:
tf.broadcast_static_shape(self._alpha.get_shape(),
self._beta.get_shape())
except ValueError:
raise ValueError(
"alpha and beta should be broadcastable to match each "
"other. ({} vs. {})".format(
self._alpha.get_shape(), self._beta.get_shape()))
self._check_numerics = check_numerics
super(Beta, self).__init__(
dtype=dtype,
param_dtype=dtype,
is_continuous=True,
is_reparameterized=False,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
rate = self.rate
given = tf.cast(given, self.param_dtype)
log_rate = tf.log(rate)
lgamma_given_plus_1 = tf.lgamma(given + 1)
if self._check_numerics:
log_rate = tf.check_numerics(log_rate, "log(rate)")
lgamma_given_plus_1 = tf.check_numerics(
lgamma_given_plus_1, "lgamma(given + 1)")
return given * log_rate - rate - lgamma_given_plus_1
def __init__(self,
alpha,
beta,
group_ndims=0,
check_numerics=False,
**kwargs):
self._alpha = tf.convert_to_tensor(alpha)
self._beta = tf.convert_to_tensor(beta)
dtype = assert_same_float_dtype(
[(self._alpha, 'InverseGamma.alpha'),
(self._beta, 'InverseGamma.beta')])
try:
tf.broadcast_static_shape(self._alpha.get_shape(),
self._beta.get_shape())
except ValueError:
raise ValueError(
"alpha and beta should be broadcastable to match each "
"other. ({} vs. {})".format(
self._alpha.get_shape(), self._beta.get_shape()))
self._check_numerics = check_numerics
super(InverseGamma, self).__init__(
dtype=dtype,
param_dtype=dtype,
is_continuous=True,
is_reparameterized=False,
group_ndims=group_ndims,
**kwargs)
def __init__(self,
loc,
scale,
group_ndims=0,
is_reparameterized=True,
use_path_derivative=False,
check_numerics=False,
**kwargs):
self._loc = tf.convert_to_tensor(loc)
self._scale = tf.convert_to_tensor(scale)
dtype = assert_same_float_dtype(
[(self._loc, 'Laplace.loc'),
(self._scale, 'Laplace.scale')])
try:
tf.broadcast_static_shape(self._loc.get_shape(),
self._scale.get_shape())
except ValueError:
raise ValueError(
"loc and scale should be broadcastable to match each "
"other. ({} vs. {})".format(
self._loc.get_shape(), self._scale.get_shape()))
self._check_numerics = check_numerics
super(Laplace, self).__init__(
dtype=dtype,
param_dtype=dtype,
is_continuous=True,
is_reparameterized=is_reparameterized,
use_path_derivative=use_path_derivative,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
loc, scale = self.path_param(self.loc),\
self.path_param(self.scale)
log_scale = tf.log(scale)
if self._check_numerics:
log_scale = tf.check_numerics(log_scale, "log(scale)")
return -np.log(2.) - log_scale - tf.abs(given - loc) / scale
def __init__(self,
temperature,
logits,
group_ndims=0,
is_reparameterized=True,
use_path_derivative=False,
check_numerics=False,
**kwargs):
self._logits = tf.convert_to_tensor(logits)
self._temperature = tf.convert_to_tensor(temperature)
param_dtype = assert_same_float_dtype(
[(self._logits, 'BinConcrete.logits'),
(self._temperature, 'BinConcrete.temperature')])
self._temperature = assert_scalar(
self._temperature, 'BinConcrete.temperature')
self._check_numerics = check_numerics
super(BinConcrete, self).__init__(
dtype=param_dtype,
param_dtype=param_dtype,
is_continuous=True,
is_reparameterized=is_reparameterized,
use_path_derivative=use_path_derivative,
group_ndims=group_ndims,
**kwargs)
def __init__(self,
alpha,
group_ndims=0,
check_numerics=False,
**kwargs):
self._alpha = tf.convert_to_tensor(alpha)
dtype = assert_same_float_dtype(
[(self._alpha, 'Dirichlet.alpha')])
static_alpha_shape = self._alpha.get_shape()
shape_err_msg = "alpha should have rank >= 1."
cat_err_msg = "n_categories (length of the last axis " \
"of alpha) should be at least 2."
if static_alpha_shape and (static_alpha_shape.ndims < 1):
raise ValueError(shape_err_msg)
elif static_alpha_shape and (
static_alpha_shape[-1].value is not None):
self._n_categories = static_alpha_shape[-1].value
if self._n_categories < 2:
raise ValueError(cat_err_msg)
else:
_assert_shape_op = tf.assert_rank_at_least(
self._alpha, 1, message=shape_err_msg)
with tf.control_dependencies([_assert_shape_op]):
self._alpha = tf.identity(self._alpha)
self._n_categories = tf.shape(self._alpha)[-1]
_assert_cat_op = tf.assert_greater_equal(
self._n_categories, 2, message=cat_err_msg)
with tf.control_dependencies([_assert_cat_op]):
self._alpha = tf.identity(self._alpha)
self._check_numerics = check_numerics
super(Dirichlet, self).__init__(
dtype=dtype,
param_dtype=dtype,
is_continuous=True,
is_reparameterized=False,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
given, alpha = maybe_explicit_broadcast(
given, self.alpha, 'given', 'alpha')
lbeta_alpha = tf.lbeta(alpha)
# fix of no static shape inference for tf.lbeta
if alpha.get_shape():
lbeta_alpha.set_shape(alpha.get_shape()[:-1])
log_given = tf.log(given)
if self._check_numerics:
lbeta_alpha = tf.check_numerics(lbeta_alpha, "lbeta(alpha)")
log_given = tf.check_numerics(log_given, "log(given)")
log_p = -lbeta_alpha + tf.reduce_sum((alpha - 1) * log_given, -1)
return log_p
def __init__(self,
temperature,
logits,
group_ndims=0,
is_reparameterized=True,
use_path_derivative=False,
check_numerics=False,
**kwargs):
self._logits = tf.convert_to_tensor(logits)
self._temperature = tf.convert_to_tensor(temperature)
param_dtype = assert_same_float_dtype(
[(self._logits, 'ExpConcrete.logits'),
(self._temperature, 'ExpConcrete.temperature')])
self._logits = assert_rank_at_least_one(
self._logits, 'ExpConcrete.logits')
self._n_categories = get_shape_at(self._logits, -1)
self._temperature = assert_scalar(
self._temperature, 'ExpConcrete.temperature')
self._check_numerics = check_numerics
super(ExpConcrete, self).__init__(
dtype=param_dtype,
param_dtype=param_dtype,
is_continuous=True,
is_reparameterized=is_reparameterized,
use_path_derivative=use_path_derivative,
group_ndims=group_ndims,
**kwargs)
def _log_prob(self, given):
logits, temperature = self.path_param(self.logits),\
self.path_param(self.temperature)
n = tf.cast(self.n_categories, self.dtype)
log_temperature = tf.log(temperature)
if self._check_numerics:
log_temperature = tf.check_numerics(
log_temperature, "log(temperature)")
temp = logits - temperature * given
return tf.lgamma(n) + (n - 1) * log_temperature + \
tf.reduce_sum(temp, axis=-1) - \
n * tf.reduce_logsumexp(temp, axis=-1)
def __init__(self,
temperature,
logits,
group_ndims=0,
is_reparameterized=True,
use_path_derivative=False,
check_numerics=False,
**kwargs):
self._logits = tf.convert_to_tensor(logits)
self._temperature = tf.convert_to_tensor(temperature)
param_dtype = assert_same_float_dtype(
[(self._logits, 'Concrete.logits'),
(self._temperature, 'Concrete.temperature')])
self._logits = assert_rank_at_least_one(
self._logits, 'Concrete.logits')
self._n_categories = get_shape_at(self._logits, -1)
self._temperature = assert_scalar(
self._temperature, 'Concrete.temperature')
self._check_numerics = check_numerics
super(Concrete, self).__init__(
dtype=param_dtype,
param_dtype=param_dtype,
is_continuous=True,
is_reparameterized=is_reparameterized,
use_path_derivative=use_path_derivative,
group_ndims=group_ndims,
**kwargs)
def clip_gradients_by_norm(grads_and_vars, add_to_summary=True):
if add_to_summary:
for grad, var in grads_and_vars:
if grad is not None:
variable_summaries(grad, 'grad/{}'.format(var.name[:-2]))
# Clip by norm. Grad can be null when not training some modules.
with tf.name_scope('clip_gradients_by_norm'):
grads_and_vars = [
(
tf.check_numerics(
tf.clip_by_norm(gv[0], 10.),
'Invalid gradient'
), gv[1]
)
if gv[0] is not None else gv
for gv in grads_and_vars
]
if add_to_summary:
for grad, var in grads_and_vars:
if grad is not None:
variable_summaries(
grad, 'clipped_grad/{}'.format(var.name[:-2]))
return grads_and_vars