def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):
dot_product = T.dot(output , self.t_w_out)
net_o = T.add( dot_product , self.t_b_out )
ex_net = T.exp(net_o)
sum_net = T.sum(ex_net, axis=2, keepdims=True)
softmax_o = ex_net / sum_net
mask = T.addbroadcast(mask, 2) # to do nesseccary?
output = T.mul(mask, softmax_o) + T.mul( (1. - mask) , 1e-6 )
return output #result
###### Linear Layer
########################################
python类exp()的实例源码
def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
deterministic, binary, L):
layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
+ l_x_list + [l_x], deterministic=deterministic)
z_mu = layer_outputs[0]
z_ls = layer_outputs[1]
x_mu = [] if binary else layer_outputs[2:2+L]
x_ls = [] if binary else layer_outputs[2+L:2+2*L]
x_list = layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
x = layer_outputs[-1]
kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
if binary:
logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
for x in x_list) * (-1./L)
prediction = x_list[0] if deterministic else x
else:
logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
for mu, ls in zip(x_mu, x_ls))/L
prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
loss = -1 * (logpxz + kl_div)
return loss, prediction
def sym_logdensity(self, x):
""" x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
h = self.nonlinearity(a * activations_factor) # BxH
Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC
Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC
Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC
p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
return (p, a, x)
# First element is different (it is predicted from the bias only)
a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH
p0 = T.zeros_like(x[0])
x0 = T.ones_like(x[0])
([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
outputs_info=[p0, a0, x0])
return (ps[-1], updates)
def sample(self, n):
W = self.W.get_value()
V_alpha = self.V_alpha.get_value()
b_alpha = self.b_alpha.get_value()
V_mu = self.V_mu.get_value()
b_mu = self.b_mu.get_value()
V_sigma = self.V_sigma.get_value()
b_sigma = self.b_sigma.get_value()
activation_rescaling = self.activation_rescaling.get_value()
samples = np.zeros((self.n_visible, n))
for s in xrange(n):
a = np.zeros((self.n_hidden,)) # H
for i in xrange(self.n_visible):
if i == 0:
a = W[i, :]
else:
a = a + W[i, :] * samples[i - 1, s]
h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
alpha = Utils.nnet.softmax(np.dot(h, V_alpha[i]) + b_alpha[i]) # C
Mu = np.dot(h, V_mu[i]) + b_mu[i] # C
Sigma = np.minimum(np.exp(np.dot(h, V_sigma[i]) + b_sigma[i]), 1)
comp = Utils.nnet.random_component(alpha)
samples[i, s] = np.random.normal(Mu[comp], Sigma[comp])
return samples
def sample(self, n):
W = self.W.get_value()
V_alpha = self.V_alpha.get_value()
b_alpha = self.b_alpha.get_value()
V_mu = self.V_mu.get_value()
b_mu = self.b_mu.get_value()
V_sigma = self.V_sigma.get_value()
b_sigma = self.b_sigma.get_value()
activation_rescaling = self.activation_rescaling.get_value()
samples = np.zeros((self.n_visible, n))
for s in xrange(n):
a = np.zeros((self.n_hidden,)) # H
for i in xrange(self.n_visible):
if i == 0:
a = W[i, :]
else:
a = a + W[i, :] * samples[i - 1, s]
h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
alpha = Utils.nnet.softmax(np.dot(h, V_alpha[i]) + b_alpha[i]) # C
Mu = np.dot(h, V_mu[i]) + b_mu[i] # C
# Sigma = np.minimum(np.exp(np.dot(h, V_sigma[i]) + b_sigma[i]), 1)
Sigma = np.exp(np.dot(h, V_sigma[i]) + b_sigma[i])
comp = Utils.nnet.random_component(alpha)
samples[i, s] = np.random.laplace(Mu[comp], Sigma[comp])
return samples
def conditional_logdensities(self, x_lt_i, range):
raise(Exception("Not implemented"))
W = self.W.get_value()
V_alpha = self.V_alpha.get_value()
b_alpha = self.b_alpha.get_value()
V_mu = self.V_mu.get_value()
b_mu = self.b_mu.get_value()
V_sigma = self.V_sigma.get_value()
b_sigma = self.b_sigma.get_value()
activation_rescaling = self.activation_rescaling.get_value()
# Calculate
i = len(x_lt_i)
a = W[0, :] + np.dot(x_lt_i, W[1:len(x_lt_i) + 1, :])
h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
alpha = Utils.nnet.softmax(np.tanh(np.dot(h, V_alpha[i]) + b_alpha[i]) * 10.0) # C
Mu = np.dot(h, V_mu[i]) + b_mu[i] # C
Sigma = np.log(1.0 + np.exp((np.dot(h, V_sigma[i]) + b_sigma[i]) * 10)) / 10 # C
def ld(x):
lds = np.array([scipy.stats.norm.logpdf(x, Mu[c], Sigma[c]) for c in xrange(self.n_components)])
return Utils.nnet.logsumexp(lds + np.log(alpha))
return np.array([ld(x) for x in range])
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def softmax_loss(p_true, output_before_softmax):
output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
if p_true.ndim==2:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
else:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
def GMM_nll(x, mus, sigmas, mix_weights):
"""
D is dimension of each observation (e.g. frame_size) for each component
(multivariate Normal with diagonal covariance matrix)
See `gaussian_nll`
x : (batch_size, D)
mus : (batch_size, D, num_gaussians)
sigmas : (batch_size, D, num_gaussians)
mix_weights : (batch_size, num_gaussians)
"""
x = x.dimshuffle(0, 1, 'x')
# Similar to `gaussian_nll`
ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
ll_component_wise += 2. * T.log(sigmas)
ll_component_wise += ((x - mus) / sigmas) ** 2.
ll_component_wise = ll_component_wise.sum(axis=1) # on FRAME_SIZE
ll_component_wise *= lib.floatX(-0.5) # LL not NLL
# Now ready to take care of weights of each component
# Simply applying exp could potentially cause inf/NaN.
# Look up LogSumExp trick, Softmax in theano, or this:
# hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
weighted_ll = ll_component_wise + T.log(mix_weights)
ll_max = T.max(weighted_ll, axis=1, keepdims=True)
nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
nll += ll_max
nll = -nll.sum(axis=1)
return nll
def softmax(x):
e_x = T.exp(x - x.max(axis=0, keepdims=True))
out = e_x / e_x.sum(axis=0, keepdims=True)
return out
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def get_output_for(self, input, init=False, **kwargs):
if input.ndim > 2:
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input = input.flatten(2)
activation = T.tensordot(input, self.W, [[1], [0]])
abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
+ 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))
if init:
mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
f = T.sum(T.exp(-abs_dif),axis=2)
if init:
mf = T.mean(f,axis=0)
f -= mf.dimshuffle('x',0)
self.init_updates.append((self.b, -mf))
else:
f += self.b.dimshuffle('x',0)
return T.concatenate([input, f], axis=1)
# Input Mixture of Gaussian Layer
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def gaussian_kl_divergence(mean, ln_var):
"""Computes the KL-divergence of Gaussian variables from the standard one.
Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
representing :math:`\\log(\\sigma^2)`, this function returns a variable
representing the KL-divergence between the given multi-dimensional Gaussian
:math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`
.. math::
D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),
where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
and :math:`I` is an identity matrix.
Args:
mean (~chainer.Variable): A variable representing mean of given
gaussian distribution, :math:`\\mu`.
ln_var (~chainer.Variable): A variable representing logarithm of
variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.
Returns:
~chainer.Variable: A variable representing KL-divergence between
given gaussian distribution and the standard gaussian.
"""
var = T.exp(ln_var)
return 0.5 * T.sum(mean * mean + var - ln_var - 1, 1)
# aliases
def get_output_for(self, input, **kwargs):
rectified = nonlinearities.softplus(input)
sum_rect = T.sum(rectified, axis=(1,2))
output = 1 - T.exp(-sum_rect)
return output
def __init__(self, incoming, exp=nn.init.Constant(2.), **kwargs):
super(AggSoPP, self).__init__(incoming, **kwargs)
self.exp = self.add_param(exp, (1,), name='exp', regularizable=False)
def get_output_for(self, input, **kwargs):
ps = nonlinearities.sigmoid(input)
powd = ps ** self.exp
tmean = T.mean(powd, axis=(1,2))
return tmean
def get_output_for(self, input, **kwargs):
return T.log(T.mean(T.exp(self.r * input), axis=self.axis) + 1e-7) / self.r