def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path!
# now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
if latent_var is None:
latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed!
latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])
# generate the generalized input (append latents to obs.)
if self.bilinear_integration:
extended_obs_var = TT.concatenate([obs_var, latent_var,
TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
outdim=2)]
, axis=1)
else:
extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
python类shared()的实例源码
def step_gibbs(self, r_h, r_v, h, *params):
'''Step Gibbs sample.
Args:
r_h (theano.randomstream): random variables for hiddens.
r_v (theano.randomstream): random variables for visibles.
h (T.tensor): hidden state.
*params: theano shared variables.
Returns:
T.tensor: hidden samples.
T.tensor: visible samples.
T.tensor: conditional hidden probability.
T.tensor: conditional visible probability.
'''
v, pv = self.step_sv_h(r_v, h, *params)
h, ph = self.step_sh_v(r_h, v, *params)
return h, v, ph, pv
def step_free_energy(self, x, beta, *params):
'''Step free energy function.
Args:
x (T.tensor): data sample.
beta (float): beta value for annealing.
*params: theano shared variables.
Returns:
T.tensor: free energy.
'''
W, v_params, h_params = self.split_params(*params)
vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
x = self.v_dist.scale_for_energy_model(x, *v_params)
hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params))
fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
return fe
def step_free_energy_h(self, h, beta, *params):
'''Step free energy function for hidden states.
Args:
h (T.tensor): hidden sample.
beta (float): beta value for annealing.
*params: theano shared variables.
Returns:
T.tensor: free energy.
'''
W, v_params, h_params = self.split_params(*params)
hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
h = self.h_dist.scale_for_energy_model(h, *h_params)
vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params))
fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
return fe
def _step(self, m, y, h_, Ur):
'''Step function for RNN call.
Args:
m (T.tensor): masks.
y (T.tensor): inputs.
h_ (T.tensor): recurrent state.
Ur (theano.shared): recurrent connection.
Returns:
T.tensor: next recurrent state.
'''
preact = T.dot(h_, Ur) + y
h = T.tanh(preact)
h = m * h + (1 - m) * h_
return h
def sgd(lr, tparams, grads, inp, cost, extra_ups=[], extra_outs=[],
exclude_params=set([])):
'''Stochastic gradient descent'''
gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k)
for k, p in tparams.iteritems()]
gsup = [(gs, g) for gs, g in zip(gshared, grads)]
f_grad_shared = theano.function(
inp, [cost]+extra_outs, updates=gsup+extra_ups, profile=profile)
pup = [(p, p - lr * g) for p, g in zip(tools.itemlist(tparams), gshared)
if p.name not in exclude_params]
if not isinstance(lr, list): lr = [lr]
f_update = theano.function(lr, [], updates=pup, profile=profile)
return f_grad_shared, f_update
def _generate_train_model_function(self, scores):
u = T.lvector('u')
i = T.lvector('i')
j = T.lvector('j')
self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
self.S = theano.shared(scores, name='S');
x_ui = T.dot(self.W, self.S[u,i,:].T);
x_uj = T.dot(self.W, self.S[u,j,:].T);
x_uij = x_ui - x_uj;
obj = T.sum(
T.log(T.nnet.sigmoid(x_uij)).sum() - \
self._lambda_w * 0.5 * (self.W ** 2).sum()
)
cost = -obj
g_cost_W = T.grad(cost=cost, wrt=self.W)
updates = [
(self.W, self.W - self._learning_rate * g_cost_W)
]
self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def __call__(self, params, cost):
updates = []
grads = T.grad(cost, params)
grads = clip_norms(grads, self.clipnorm)
t = theano.shared(floatX(1.))
b1_t = self.b1*self.l**(t-1)
for p, g in zip(params, grads):
g = self.regularizer.gradient_regularize(p, g)
m = theano.shared(p.get_value() * 0.)
v = theano.shared(p.get_value() * 0.)
m_t = b1_t*m + (1 - b1_t)*g
v_t = self.b2*v + (1 - self.b2)*g**2
m_c = m_t / (1-self.b1**t)
v_c = v_t / (1-self.b2**t)
p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
p_t = self.regularizer.weight_regularize(p_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t) )
updates.append((t, t + 1.))
return updates
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
"""
Performs a Metropolis-Hastings accept-reject move.
Parameters
----------
energy_prev: theano vector
Symbolic theano tensor which contains the energy associated with the
configuration at time-step t.
energy_next: theano vector
Symbolic theano tensor which contains the energy associated with the
proposed configuration at time-step t+1.
s_rng: theano.tensor.shared_randomstreams.RandomStreams
Theano shared random stream object used to generate the random number
used in proposal.
Returns
-------
return: boolean
True if move is accepted, False otherwise
"""
ediff = energy_prev - energy_next
return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
def draw(self, **kwargs):
"""
Returns a new position obtained after `n_steps` of HMC simulation.
Parameters
----------
kwargs: dictionary
The `kwargs` dictionary is passed to the shared variable
(self.positions) `get_value()` function. For example, to avoid
copying the shared variable value, consider passing `borrow=True`.
Returns
-------
rval: numpy matrix
Numpy matrix whose of dimensions similar to `initial_position`.
"""
self.simulate()
return self.positions.get_value(borrow=False)
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
"""
Performs a Metropolis-Hastings accept-reject move.
Parameters
----------
energy_prev: theano vector
Symbolic theano tensor which contains the energy associated with the
configuration at time-step t.
energy_next: theano vector
Symbolic theano tensor which contains the energy associated with the
proposed configuration at time-step t+1.
s_rng: theano.tensor.shared_randomstreams.RandomStreams
Theano shared random stream object used to generate the random number
used in proposal.
Returns
-------
return: boolean
True if move is accepted, False otherwise
"""
ediff = energy_prev - energy_next
return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
def draw(self, **kwargs):
"""
Returns a new position obtained after `n_steps` of HMC simulation.
Parameters
----------
kwargs: dictionary
The `kwargs` dictionary is passed to the shared variable
(self.positions) `get_value()` function. For example, to avoid
copying the shared variable value, consider passing `borrow=True`.
Returns
-------
rval: numpy matrix
Numpy matrix whose of dimensions similar to `initial_position`.
"""
self.simulate()
return self.positions.get_value(borrow=False)
def __call__(self, params, cost):
updates = []
grads = T.grad(cost, params)
grads = clip_norms(grads, self.clipnorm)
t = theano.shared(floatX(1.))
b1_t = self.b1*self.l**(t-1)
for p, g in zip(params, grads):
g = self.regularizer.gradient_regularize(p, g)
m = theano.shared(p.get_value() * 0.)
v = theano.shared(p.get_value() * 0.)
m_t = b1_t*m + (1 - b1_t)*g
v_t = self.b2*v + (1 - self.b2)*g**2
m_c = m_t / (1-self.b1**t)
v_c = v_t / (1-self.b2**t)
p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
p_t = self.regularizer.weight_regularize(p_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((t, t + 1.))
return updates
def _init_params(self):
self.W_hhs = []
self.b_hhs = []
for dx in xrange(self.n_layers):
W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
self.n_hids[dx],
self.sparsity[dx],
self.scale[dx],
rng=self.rng)
self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
(dx,self.name)))
if dx > 0:
self.b_hhs.append(theano.shared(
self.bias_fn[dx-1](self.n_hids[dx],
self.bias_scale[dx-1],
self.rng),
name='b%d_%s' %(dx, self.name)))
self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
self.params_grad_scale = [self.grad_scale for x in self.params]
if self.weight_noise:
self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs]
self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_hhs]
self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
for x in self.noise_params]
def _init_params(self):
self.iBlocks = 1 # number of blocks in the input (from lower layer)
W_em = self.init_fn(self.n_in,
self.n_class,
self.sparsity,
self.scale,
self.rng)
self.W_em = theano.shared(W_em,
name='W_%s' % self.name)
self.b_em = theano.shared(
self.bias_fn(self.n_class, self.bias_scale, self.rng),
name='b_%s' % self.name)
U_em = theano.shared(((self.rng.rand(self.iBlocks, self.n_class,
self.n_in, self.n_words_class)-0.5)/(self.n_words_class*self.n_in)
).astype(theano.config.floatX), name='U_%s'%self.name)
self.U_em = U_em
c_em = numpy.zeros((self.n_class, self.n_words_class), dtype='float32')
n_words_last_class = self.n_out % self.n_words_class
#c_em[-1, n_words_last_class:] = -numpy.inf
self.c_em = theano.shared(c_em, name='c_%s' % self.name)
self.params = [self.W_em, self.b_em, self.U_em, self.c_em]
self.params_grad_scale = [self.grad_scale for x in self.params]
def __init__(self, rng, std = 0.1, ndim=0, avg =0, shape_fn=None):
"""
"""
assert rng is not None, "random number generator should not be empty!"
super(GaussianNoise, self).__init__(0, 0, rng)
self.std = scale
self.avg = self.avg
self.ndim = ndim
self.shape_fn = shape_fn
if self.shape_fn:
# Name is not important as it is not a parameter of the model
self.noise_term = theano.shared(numpy.zeros((2,)*ndim,
dtype=theano.config.floatX),
name='ndata')
self.noise_params += [self.noise_term]
self.noise_params_shape_fn += [shape_fn]
self.trng = RandomStreams(rng.randint(1e5))
def _init_params(self):
self.W_hhs = []
self.b_hhs = []
for dx in xrange(self.n_layers):
W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
self.n_hids[dx],
self.sparsity[dx],
self.scale[dx],
rng=self.rng)
self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
(dx,self.name)))
if dx > 0:
self.b_hhs.append(theano.shared(
self.bias_fn[dx-1](self.n_hids[dx],
self.bias_scale[dx-1],
self.rng),
name='b%d_%s' %(dx, self.name)))
self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
self.params_grad_scale = [self.grad_scale for x in self.params]
if self.weight_noise:
self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs]
self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_hhs]
self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
for x in self.noise_params]
def fit(self, x):
s = x.shape
x = x.copy().reshape((s[0],np.prod(s[1:])))
m = np.mean(x, axis=0)
x -= m
sigma = np.dot(x.T,x) / x.shape[0]
U, S, V = linalg.svd(sigma)
tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization)))
tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization)))
self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX))
self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX))
self.mean = th.shared(m.astype(th.config.floatX))
def compile(
self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0,
fetches_=None, updates_=None, givens_=None,
trunc_grad_=None, profile_=False
):
'''
compile optimizer against specific model
Args:
s_inputs_: list of symbolic input tensors, including label
s_loss_: optimization loss, symbolic scalar
v_params_: list of shared parameters to optimize
s_grads: list of gradients to apply, must be same order as v_params_, default is None (use autodiff).
s_reg_: symbolic regularization term, default 0 (no regularization)
updates: update operation for shared values after a step of optimization,
usually RNN states. Takes form [(v_var, s_new_var), ...]
Returns: None
'''
self.s_loss = s_loss_
self.s_reg = s_reg_
if s_grads_ is None:
s_grads_ = T.grad(
self.s_loss + self.s_reg, list(v_params_), disconnected_inputs='warn')
if type(trunc_grad_)==float:
self.s_grads = [T.clip(g,-trunc_grad_,trunc_grad_) for g in s_grads_]
else:
self.s_grads = s_grads_
def init_tparams(params):
tparams = OrderedDict()
for kk, pp in params.iteritems():
tparams[kk] = theano.shared(params[kk], name=kk, borrow=True)
add_role(tparams[kk], PARAMETER)
return tparams
# make prefix-appended name