def test_relu():
'''
Relu implementation doesn't depend on the value being
a theano variable. Testing ints, floats and theano tensors.
'''
from keras.activations import relu as r
assert r(5) == 5
assert r(-5) == 0
assert r(-0.1) == 0
assert r(0.1) == 0.1
x = T.vector()
exp = r(x)
f = theano.function([x], exp)
test_values = get_standard_values()
result = f(test_values)
list_assert_equal(result, test_values) # because no negatives in test values
python类tanh()的实例源码
def test_tanh():
from keras.activations import tanh as t
test_values = get_standard_values()
x = T.vector()
exp = t(x)
f = theano.function([x], exp)
result = f(test_values)
expected = [math.tanh(v) for v in test_values]
print(result)
print(expected)
list_assert_equal(result, expected)
def test_relu():
'''
Relu implementation doesn't depend on the value being
a theano variable. Testing ints, floats and theano tensors.
'''
from keras.activations import relu as r
assert r(5) == 5
assert r(-5) == 0
assert r(-0.1) == 0
assert r(0.1) == 0.1
x = T.vector()
exp = r(x)
f = theano.function([x], exp)
test_values = get_standard_values()
result = f(test_values)
list_assert_equal(result, test_values) # because no negatives in test values
def step(self, x_input, states):
#print "x_input:", x_input, x_input.shape
# <TensorType(float32, matrix)>
input_shape = self.input_spec[0].shape
en_seq = states[-1]
_, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])
# vt*tanh(W1*e+W2*d)
dec_seq = K.repeat(h, input_shape[1])
Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
U = self.vt * tanh(Eij + Dij)
U = K.squeeze(U, 2)
# make probability tensor
pointer = softmax(U)
return pointer, [h, c]
def test_relu():
'''
Relu implementation doesn't depend on the value being
a theano variable. Testing ints, floats and theano tensors.
'''
from keras.activations import relu as r
assert r(5) == 5
assert r(-5) == 0
assert r(-0.1) == 0
assert r(0.1) == 0.1
x = T.vector()
exp = r(x)
f = theano.function([x], exp)
test_values = get_standard_values()
result = f(test_values)
list_assert_equal(result, test_values) # because no negatives in test values
def test_tanh():
from keras.activations import tanh as t
test_values = get_standard_values()
x = T.vector()
exp = t(x)
f = theano.function([x], exp)
result = f(test_values)
expected = [math.tanh(v) for v in test_values]
print(result)
print(expected)
list_assert_equal(result, expected)
def test_tanh():
test_values = get_standard_values()
x = K.placeholder(ndim=2)
exp = activations.tanh(x)
f = K.function([x], [exp])
result = f([test_values])[0]
expected = np.tanh(test_values)
assert_allclose(result, expected, rtol=1e-05)
def get_initial_state(self, inputs):
print('inputs shape:', inputs.get_shape())
# apply the matrix on the first time step to get the initial s0.
s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))
# from keras.layers.recurrent to initialize a vector of (batchsize,
# output_dim)
y0 = K.zeros_like(inputs) # (samples, timesteps, input_dims)
y0 = K.sum(y0, axis=(1, 2)) # (samples, )
y0 = K.expand_dims(y0) # (samples, 1)
y0 = K.tile(y0, [1, self.output_dim])
return [y0, s0]
def test_tanh():
test_values = get_standard_values()
x = K.placeholder(ndim=2)
exp = activations.tanh(x)
f = K.function([x], [exp])
result = f([test_values])[0]
expected = np.tanh(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_tanh():
test_values = get_standard_values()
x = K.placeholder(ndim=2)
exp = activations.tanh(x)
f = K.function([x], [exp])
result = f([test_values])[0]
expected = np.tanh(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_tanh():
from keras.activations import tanh as t
test_values = get_standard_values()
x = T.vector()
exp = t(x)
f = theano.function([x], exp)
result = f(test_values)
expected = [math.tanh(v) for v in test_values]
print(result)
print(expected)
list_assert_equal(result, expected)
def __init__(self, units, output_dim,
activation='tanh',
return_probabilities=False,
name='AttentionDecoder',
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
"""
Implements an AttentionDecoder that takes in a sequence encoded by an
encoder and outputs the decoded states
:param units: dimension of the hidden state and the attention matrices
:param output_dim: the number of labels in the output space
references:
Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio.
"Neural machine translation by jointly learning to align and translate."
arXiv preprint arXiv:1409.0473 (2014).
"""
self.units = units
self.output_dim = output_dim
self.return_probabilities = return_probabilities
self.activation = activations.get(activation)
self.kernel_initializer = initializers.get(kernel_initializer)
self.recurrent_initializer = initializers.get(recurrent_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.recurrent_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.recurrent_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
super(AttentionDecoder, self).__init__(**kwargs)
self.name = name
self.return_sequences = True # must return sequences
def step(self, x, states):
ytm, stm = states
# repeat the hidden state to the length of the sequence
_stm = K.repeat(stm, self.timesteps)
# now multiplty the weight matrix with the repeated hidden state
_Wxstm = K.dot(_stm, self.W_a)
# calculate the attention probabilities
# this relates how much other timesteps contributed to this one.
et = K.dot(activations.tanh(_Wxstm + self._uxpb),
K.expand_dims(self.V_a))
at = K.exp(et)
at_sum = K.sum(at, axis=1)
at_sum_repeated = K.repeat(at_sum, self.timesteps)
at /= at_sum_repeated # vector of size (batchsize, timesteps, 1)
# calculate the context vector
context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
# ~~~> calculate new hidden state
# first calculate the "r" gate:
rt = activations.sigmoid(
K.dot(ytm, self.W_r)
+ K.dot(stm, self.U_r)
+ K.dot(context, self.C_r)
+ self.b_r)
# now calculate the "z" gate
zt = activations.sigmoid(
K.dot(ytm, self.W_z)
+ K.dot(stm, self.U_z)
+ K.dot(context, self.C_z)
+ self.b_z)
# calculate the proposal hidden state:
s_tp = activations.tanh(
K.dot(ytm, self.W_p)
+ K.dot((rt * stm), self.U_p)
+ K.dot(context, self.C_p)
+ self.b_p)
# new hidden state:
st = (1-zt)*stm + zt * s_tp
yt = activations.softmax(
K.dot(ytm, self.W_o)
+ K.dot(stm, self.U_o)
+ K.dot(context, self.C_o)
+ self.b_o)
if self.return_probabilities:
return at, [yt, st]
else:
return yt, [yt, st]
def _split_and_apply_activations(self, controller_output):
""" This takes the controller output, splits it in ntm_output, read and wright adressing data.
It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
consisting of an erase and an add vector.
As it is necesseary for stable results,
k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
shift via softmax,
gamma is sigmoided, inversed and clipped (probably not ideal)
g is sigmoided,
beta is linear (probably not ideal!) """
# splitting
ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
controller_output,
np.asarray([self.output_dim,
self.read_heads * self.controller_read_head_emitting_dim,
self.write_heads * self.controller_write_head_emitting_dim]),
axis=1)
controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)
controller_instructions_read = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for
single_head_data in controller_instructions_read]
controller_instructions_write = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for
single_head_data in controller_instructions_write]
#activation
ntm_output = self.activation(ntm_output)
controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
(k, beta, g, shift, gamma) in controller_instructions_read]
controller_instructions_write = [
(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for
(k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]
return (ntm_output, controller_instructions_read, controller_instructions_write)