def gelu(x):
return 0.5 * x * (1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3))))
python类tanh()的实例源码
def lyr_lstm(
self, name_,
s_x_, s_cell_, s_hid_,
idim_, hdim_,
axis_=-1,
lyr_linear_=None,
op_act_=T.tanh,
op_gate_=T.nnet.sigmoid):
s_inp = T.join(axis_, s_x_, s_hid_)
if lyr_linear_ is None:
lyr_linear_ = self.lyr_linear
s_gates_lin, s_inp_lin = T.split(
lyr_linear_(name_+'_rec', s_inp, idim_+hdim_, hdim_*4),
[hdim_*3,hdim_], 2, axis=axis_)
s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_]*3, 3, axis=axis_)
s_cell_tp1 = s_igate*op_act_(s_inp_lin) + s_fgate*s_cell_
s_hid_tp1 = op_act_(s_cell_tp1)*s_ogate
return s_cell_tp1, s_hid_tp1
def __VanillaRNNstep(
name,
input_dim,
hidden_dim,
current_inp,
last_hidden,
weightnorm=True):
"""
CAUTION:
Not for stand-alone usage. It is defined here (instead of
inside VanillaRNN function) to not clutter the code.
:todo:
- Implement!
- Test!
"""
# S_t = tanh(U*X_t+W*S_{t-1})
raise NotImplementedError
def _step(self, x_, m_, h_, c_):
preact = tensor.dot(h_, self.U) + x_
i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
j = tensor.tanh(_slice(preact, 3, self.hidden_dim))
c = f * c_ + i * j
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = o * tensor.tanh(c)
if self.recurrent_dropout_layer != None:
h = self.recurrent_dropout_layer.connect(h, self.is_train)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
def _step(self, x_, m_, h_, c_):
preact= tensor.dot(h_, self.U) + _slice(x_, 0, self.hidden_dim * 5)
# i: input. f: forget. o: output. t: transform.
# j: input w\ non-linearity. k: input w\o non-linearity.
i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
t = tensor.nnet.sigmoid(_slice(preact, 3, self.hidden_dim))
j = tensor.tanh(_slice(preact, 4, self.hidden_dim))
k = _slice(x_, 5, self.hidden_dim)
c = f * c_ + i * j
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = t * o * tensor.tanh(c) + (1. - t) * k
if self.recurrent_dropout_layer != None:
h = self.recurrent_dropout_layer.connect(h, self.is_train)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
def _step(self, x_, px_, m_, h_, c_):
preact = tensor.dot(h_, self.U) + px_
# i: input. f: forget. o: output. t: transform.
# j: input w\ non-linearity. k: input w\o non-linearity.
i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
t = tensor.nnet.sigmoid(_slice(preact, 3, self.hidden_dim))
j = tensor.tanh(_slice(preact, 4, self.hidden_dim))
c = f * c_ + i * j
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = t * o * tensor.tanh(c) + (1. - t) * x_
if self.recurrent_dropout_layer != None:
h = self.recurrent_dropout_layer.connect(h, self.is_train)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
def _step(self, x_, px_, m_, h_, c_):
preact = tensor.dot(h_, self.U) + px_
i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim))
f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias)
o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim))
j = tensor.tanh(_slice(preact, 3, self.hidden_dim))
c = f * c_ + i * j
c = m_[:, None] * c + (1. - m_)[:, None] * c_
# Residual connection.
h = o * tensor.tanh(c) + x_
if self.recurrent_dropout_layer != None:
h = self.recurrent_dropout_layer.connect(h, self.is_train)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
def __init__(self, inputDim=None, nFilters=None, filterDim=None, activation=T.tanh,
filter_shape=None, image_shape=None, outputDim=None, stride=(1, 1), border_mode='valid'):
"""
:type filter_shape: tuple or list of length 4
:param filter_shape: (number of filters, num inputVar feature maps, filter height,filter width)
:type image_shape: tuple or list of length 4
:param image_shape: (batch size, num inputVar feature maps, image height, image width)
:type stride: tuple or list of length 2
:param stride: the downsampling (pooling) factor (#rows,#cols)
"""
super(ConvLayerParams, self).__init__(inputDim, outputDim)
self._nFilters = nFilters
self._filterDim = filterDim
self._filter_shape = filter_shape
self._image_shape = image_shape
self._activation = activation
self._stride = stride
self._border_mode = border_mode
self.update()
def __init__(self, inputDim=None, nFilters=None, filterDim=None, activation=T.tanh, poolsize=(1, 1), poolType=0,
filter_shape=None, image_shape=None, outputDim=None, stride=(1, 1), border_mode='valid'):
"""
:type filter_shape: tuple or list of length 4
:param filter_shape: (number of filters, num inputVar feature maps, filter height,filter width)
:type image_shape: tuple or list of length 4
:param image_shape: (batch size, num inputVar feature maps, image height, image width)
:type poolsize: tuple or list of length 2
:param poolsize: the downsampling (pooling) factor (#rows,#cols)
"""
super(ConvPoolLayerParams, self).__init__(inputDim, outputDim)
self._nFilters = nFilters
self._filterDim = filterDim
self._poolsize = poolsize
self._poolType = poolType
self._filter_shape = filter_shape
self._image_shape = image_shape
self._activation = activation
self._stride = stride
self._border_mode = border_mode
self.update()
def lyr_gru_flat(
self, name_,
s_x_, s_state_,
idim_, hdim_,
axis_=-1,
lyr_linear_=None,
op_act_=T.tanh,
op_gate_=T.nnet.sigmoid,
params_group_='params'
):
'''
GRU layer, flat version
In order to use, you need to provide state variable
'''
if lyr_linear_ is None:
lyr_linear_ = self.lyr_linear
s_igate = lyr_linear_(name_+'_igate', idim_+hdim_, idim_, params_group_=params_group_)
s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_)
s_gate_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_gate', s_inp_gated, idim_+hdim_, hdim_*2), [hdim_,hdim_], 2, axis_)
s_gate = op_gate_(s_gate_lin)
return s_state_*s_gate + op_act_(s_state_tp1_lin)*(1.-s_gate)
def gru_layer(tparams, emb, options):
hiddenDimSize = options['hiddenDimSize']
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
def stepFn(wx, h, U_gru):
uh = T.dot(h, U_gru)
r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize))
z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize))
h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize))
h_new = z * h + ((1. - z) * h_tilde)
return h_new
Wx = T.dot(emb, tparams['W_gru']) + tparams['b_gru']
results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps)
return results
def one_step(self, x, h_tm1, s_tm1):
"""
Run the forward pass for a single timestep of a LSTM
h_tm1: initial h
s_tm1: initial s (cell state)
"""
g = T.tanh(T.dot(x, self.W_gx) + T.dot(h_tm1, self.W_gh) + self.b_g)
i = T.nnet.sigmoid(T.dot(x, self.W_ix) + T.dot(h_tm1, self.W_ih) + self.b_i)
f = T.nnet.sigmoid(T.dot(x, self.W_fx) + T.dot(h_tm1, self.W_fh) + self.b_f)
o = T.nnet.sigmoid(T.dot(x, self.W_ox) + T.dot(h_tm1, self.W_oh) + self.b_o)
s = i * g + s_tm1 * f
h = T.tanh(s) * o
return h, s
def set_net_params(self):
'''Returns MLP parameters for scan.'''
super(GRU, self).set_net_params()
if self.input_net_aux is None:
self.input_net_aux = MLP(
self.dim_in, 2 * self.dim_h, 2 * self.dim_hs[0], 1,
rng=self.rng, trng=self.trng,
h_act='T.nnet.sigmoid', out_act='T.tanh',
name='input_net_aux')
else:
assert self.input_net_aux.dim_in == self.dim_in
assert self.input_net_aux.dim_out == 2 * self.dim_hs[0]
self.input_net_aux.name = self.name + '_input_net_aux'
self.nets.append(self.input_net_aux)
for i in xrange(self.n_layers - 1):
n = MLP(self.dim_hs[i], 2 * self.dim_hs[i+1],
rng=self.rng, trng=self.trng,
distribution='centered_binomial',
name='rnn_net_aux%d' % i)
self.inter_nets.append(n) #insert(2 * i + 1, n)
def _step(self, m, y, h_, Ur):
'''Step function for RNN call.
Args:
m (T.tensor): masks.
y (T.tensor): inputs.
h_ (T.tensor): recurrent state.
Ur (theano.shared): recurrent connection.
Returns:
T.tensor: next recurrent state.
'''
preact = T.dot(h_, Ur) + y
h = T.tanh(preact)
h = m * h + (1 - m) * h_
return h
def fullyconnected_layer(tparams, state_below, options, prefix, activ='lambda x: x', **kwargs):
"""
compute the forward pass for a fully connected layer
Parameters
----------
tparams : OrderedDict of theano shared variables, {parameter name: value}
state_below : theano 3d tensor, input data, dimensions: (num of time steps, batch size, dim of vector)
options : dictionary, {hyperparameter: value}
prefix : string, layer name
activ : string, activation function: 'liner', 'tanh', or 'rectifier'
Returns
-------
: theano 3d tensor, output data, dimensions: (num of time steps, batch size, dim of vector)
"""
return eval(activ)(tensor.dot(state_below, tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
"""
compute the forward pass for a gate layer
Parameters
----------
tparams : OrderedDict of theano shared variables, {parameter name: value}
X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
options : dictionary, {hyperparameter: value}
prefix : string, layer name
pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
activ : string, activation function: 'liner', 'tanh', or 'rectifier'
Returns
-------
X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)
"""
# compute gating values, Eq.(3)
G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0])
X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),
ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)
return eval(activ)(X)
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
"""
compute the forward pass for a concat layer
Parameters
----------
tparams : OrderedDict of theano shared variables, {parameter name: value}
X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
options : dictionary, {hyperparameter: value}
prefix : string, layer name
pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
activ : string, activation function: 'liner', 'tanh', or 'rectifier'
Returns
-------
X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)
"""
X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),
ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
tensor.dot(tensor.concatenate([X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
return eval(activ)(X)
def __init__(self, n_in, n_out, activation=tanh,
clip_gradients=False, init_zero=False):
self.n_in = n_in
self.n_out = n_out
self.activation = activation
self.clip_gradients = clip_gradients
#self.in_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
#self.forget_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
#self.out_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
self.in_gate = RecurrentLayer(n_in+n_out, n_out, sigmoid, clip_gradients, init_zero)
self.out_gate = RecurrentLayer(n_in+n_out, n_out, sigmoid, clip_gradients, init_zero)
self.input_layer = RecurrentLayer(n_in, n_out, activation, clip_gradients, init_zero)
self.internal_layers = [ self.input_layer, self.in_gate,
self.out_gate]#, self.forget_gate]
def __init__(self, n_in, n_out, activation=tanh,
order=1, clip_gradients=False, BN=False):
self.n_in = n_in
self.n_out = n_out
self.activation = activation
self.order = order
self.clip_gradients = clip_gradients
# batch, in, row, col
self.input_shape = (None, n_in, 1, None)
# out, in, row, col
self.filter_shape = (n_out, n_in, 1, order)
self.W = create_shared(random_init(self.filter_shape), name="W")
if not BN:
self.bias = create_shared(random_init((n_out,)), name="bias")
self.BNLayer = None
self.BN = BN
if BN:
# calculate appropriate input_shape, (mini_batch_size, # of channel, # row, # column)
new_shape = list(self.input_shape)
new_shape[1] = self.filter_shape[0]
new_shape = tuple(new_shape)
self.BNLayer = BatchNormalization(new_shape, mode=1)
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
Wx = T.dot(emb, tparams['W_'+layerIndex])
def stepFn(stepMask, wrx, wzx, wx, h):
r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
h_new = z * h + ((1. - z) * h_tilde)
h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
return h_new#, output, time
results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)
return results
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
timesteps = emb.shape[0]
if emb.ndim == 3: n_samples = emb.shape[1]
else: n_samples = 1
W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
Wx = T.dot(emb, tparams['W_'+layerIndex])
def stepFn(stepMask, wrx, wzx, wx, h):
r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
h_new = z * h + ((1. - z) * h_tilde)
h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
return h_new
results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)
return results
def recurrent_as_activation_function(self, Wix, Uix, h_tm1, c_tm1, y_tm1):
""" Implement the recurrent unit as an activation function. This function is called by self.__init__().
:param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
:type Wix: matrix
:param h_tm1: contains the hidden activation from previous time step
:type h_tm1: matrix, each row means a hidden activation vector of a time step
:param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
:returns: h_t is the hidden activation of current time step
"""
h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + T.dot(y_tm1, self.W_yi) + self.b_i) #
# simple recurrent decoder
#y_t = T.dot(h_t, self.U_hi) + self.b
# recurrent output and additional input
y_t = Uix + T.dot(h_t, self.U_hi) + T.dot(y_tm1, self.U_yi) + self.b
c_t = h_t
return h_t, c_t, y_t
def recurrent_as_activation_function(self, Wix, Wiy, h_tm1, c_tm1):
""" Implement the recurrent unit as an activation function. This function is called by self.__init__().
:param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
:type Wix: matrix
:param h_tm1: contains the hidden activation from previous time step
:type h_tm1: matrix, each row means a hidden activation vector of a time step
:param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
:returns: h_t is the hidden activation of current time step
"""
h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + Wiy + self.b_i) #
c_t = h_t
return h_t, c_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1):
""" This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`
"""
i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) #
f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) #
c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + T.dot(y_tm1, self.W_yi) + self.b_c)
o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o)
h_t = o_t * T.tanh(c_t)
y_t = T.dot(h_t, self.U_ho) + self.b
return h_t, c_t, y_t #, i_t, f_t, o_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
""" This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`
"""
i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) #
f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) #
c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c)
o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o)
h_t = o_t * T.tanh(c_t)
return h_t, c_t#, i_t, f_t, o_t
def apply_activation(self, lin_output, activation):
if activation == 'SIGMOID':
final_output = T.nnet.sigmoid(lin_output)
elif activation == 'TANH':
final_output = T.tanh(lin_output)
elif activation == 'LINEAR':
final_output = lin_output
elif activation == 'ReLU': ## rectifier linear unit
final_output = T.maximum(0.0, lin_output)
elif activation == 'ReSU': ## rectifier smooth unit
final_output = numpy.log(1.0 + numpy.exp(lin_output))
else:
self.logger.critical('the input activation function: %s is not supported right now. Please modify layers.py to support' % (activation))
raise
return final_output
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
""" This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`
"""
i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) #
f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) #
c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c)
o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o)
h_t = o_t * T.tanh(c_t)
return h_t, c_t#, i_t, f_t, o_t
def recurrent_as_activation_function(self, Wix, h_tm1, c_tm1):
""" Implement the recurrent unit as an activation function. This function is called by self.__init__().
:param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation
:type Wix: matrix
:param h_tm1: contains the hidden activation from previous time step
:type h_tm1: matrix, each row means a hidden activation vector of a time step
:param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM
:returns: h_t is the hidden activation of current time step
"""
h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + self.b_i) #
c_t = h_t
return h_t, c_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1):
""" This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`
"""
i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) #
f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) #
c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c)
o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o)
h_t = o_t * T.tanh(c_t)
return h_t, c_t#, i_t, f_t, o_t
def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1):
""" This function treats the LSTM block as an activation function, and implements the standard LSTM activation function.
The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn`
"""
i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.w_ci * c_tm1 + self.b_i) #
f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.w_cf * c_tm1 + self.b_f) #
c_t = f_t * c_tm1 + i_t * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + T.dot(y_tm1, self.W_yi) + self.b_c)
o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o)
h_t = o_t * T.tanh(c_t)
y_t = T.dot(h_t, self.U_ho) + self.b
return h_t, c_t, y_t #, i_t, f_t, o_t