def test_sigmoid():
'''
Test using a numerically stable reference sigmoid implementation
'''
def ref_sigmoid(x):
if x >= 0:
return 1 / (1 + np.exp(-x))
else:
z = np.exp(x)
return z / (1 + z)
sigmoid = np.vectorize(ref_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
python类sigmoid()的实例源码
def test_hard_sigmoid():
'''
Test using a reference hard sigmoid implementation
'''
def ref_hard_sigmoid(x):
'''
Reference hard sigmoid with slope and shift values from theano, see
https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
'''
x = (x * 0.2) + 0.5
z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
return z
hard_sigmoid = np.vectorize(ref_hard_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.hard_sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = hard_sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_sigmoid():
'''
Test using a numerically stable reference sigmoid implementation
'''
def ref_sigmoid(x):
if x >= 0:
return 1 / (1 + np.exp(-x))
else:
z = np.exp(x)
return z / (1 + z)
sigmoid = np.vectorize(ref_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_hard_sigmoid():
'''
Test using a reference hard sigmoid implementation
'''
def ref_hard_sigmoid(x):
'''
Reference hard sigmoid with slope and shift values from theano, see
https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
'''
x = (x * 0.2) + 0.5
z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
return z
hard_sigmoid = np.vectorize(ref_hard_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.hard_sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = hard_sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_sigmoid():
'''
Test using a numerically stable reference sigmoid implementation
'''
def ref_sigmoid(x):
if x >= 0:
return 1 / (1 + np.exp(-x))
else:
z = np.exp(x)
return z / (1 + z)
sigmoid = np.vectorize(ref_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_hard_sigmoid():
'''
Test using a reference hard sigmoid implementation
'''
def ref_hard_sigmoid(x):
'''
Reference hard sigmoid with slope and shift values from theano, see
https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
'''
x = (x * 0.2) + 0.5
z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
return z
hard_sigmoid = np.vectorize(ref_hard_sigmoid)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.hard_sigmoid(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = hard_sigmoid(test_values)
assert_allclose(result, expected, rtol=1e-05)
def __init__(self, units,
n_slots=50,
m_depth=20,
shift_range=3,
controller_model=None,
read_heads=1,
write_heads=1,
activation='sigmoid',
batch_size=777,
stateful=False,
**kwargs):
self.output_dim = units
self.units = units
self.n_slots = n_slots
self.m_depth = m_depth
self.shift_range = shift_range
self.controller = controller_model
self.activation = get_activations(activation)
self.read_heads = read_heads
self.write_heads = write_heads
self.batch_size = batch_size
# self.return_sequence = True
try:
if controller.state.stateful:
self.controller_with_state = True
except:
self.controller_with_state = False
self.controller_read_head_emitting_dim = _controller_read_head_emitting_dim(m_depth, shift_range)
self.controller_write_head_emitting_dim = _controller_write_head_emitting_dim(m_depth, shift_range)
super(NeuralTuringMachine, self).__init__(**kwargs)
def call(self, x):
y = K.dot(x, self.W_carry)
if self.bias:
y += self.b_carry
transform_weight = activations.sigmoid(y)
y = K.dot(x, self.W)
if self.bias:
y += self.b
act = self.activation(y)
act *= transform_weight
output = act + (1 - transform_weight) * x
return output
def call(self, x):
y = K.dot(x, self.W_carry)
if self.bias:
y += self.b_carry
transform_weight = activations.sigmoid(y)
y = K.dot(x, self.W)
if self.bias:
y += self.b
act = self.activation(y)
act *= transform_weight
output = act + (1 - transform_weight) * x
return output
def step(self, x, states):
ytm, stm = states
# repeat the hidden state to the length of the sequence
_stm = K.repeat(stm, self.timesteps)
# now multiplty the weight matrix with the repeated hidden state
_Wxstm = K.dot(_stm, self.W_a)
# calculate the attention probabilities
# this relates how much other timesteps contributed to this one.
et = K.dot(activations.tanh(_Wxstm + self._uxpb),
K.expand_dims(self.V_a))
at = K.exp(et)
at_sum = K.sum(at, axis=1)
at_sum_repeated = K.repeat(at_sum, self.timesteps)
at /= at_sum_repeated # vector of size (batchsize, timesteps, 1)
# calculate the context vector
context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
# ~~~> calculate new hidden state
# first calculate the "r" gate:
rt = activations.sigmoid(
K.dot(ytm, self.W_r)
+ K.dot(stm, self.U_r)
+ K.dot(context, self.C_r)
+ self.b_r)
# now calculate the "z" gate
zt = activations.sigmoid(
K.dot(ytm, self.W_z)
+ K.dot(stm, self.U_z)
+ K.dot(context, self.C_z)
+ self.b_z)
# calculate the proposal hidden state:
s_tp = activations.tanh(
K.dot(ytm, self.W_p)
+ K.dot((rt * stm), self.U_p)
+ K.dot(context, self.C_p)
+ self.b_p)
# new hidden state:
st = (1-zt)*stm + zt * s_tp
yt = activations.softmax(
K.dot(ytm, self.W_o)
+ K.dot(stm, self.U_o)
+ K.dot(context, self.C_o)
+ self.b_o)
if self.return_probabilities:
return at, [yt, st]
else:
return yt, [yt, st]
def _split_and_apply_activations(self, controller_output):
""" This takes the controller output, splits it in ntm_output, read and wright adressing data.
It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
consisting of an erase and an add vector.
As it is necesseary for stable results,
k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
shift via softmax,
gamma is sigmoided, inversed and clipped (probably not ideal)
g is sigmoided,
beta is linear (probably not ideal!) """
# splitting
ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
controller_output,
np.asarray([self.output_dim,
self.read_heads * self.controller_read_head_emitting_dim,
self.write_heads * self.controller_write_head_emitting_dim]),
axis=1)
controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)
controller_instructions_read = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for
single_head_data in controller_instructions_read]
controller_instructions_write = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for
single_head_data in controller_instructions_write]
#activation
ntm_output = self.activation(ntm_output)
controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
(k, beta, g, shift, gamma) in controller_instructions_read]
controller_instructions_write = [
(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for
(k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]
return (ntm_output, controller_instructions_read, controller_instructions_write)
def LSTMCNN(opt):
# opt.seq_length = number of time steps (words) in each batch
# opt.rnn_size = dimensionality of hidden layers
# opt.num_layers = number of layers
# opt.dropout = dropout probability
# opt.word_vocab_size = num words in the vocab
# opt.word_vec_size = dimensionality of word embeddings
# opt.char_vocab_size = num chars in the character vocab
# opt.char_vec_size = dimensionality of char embeddings
# opt.feature_maps = table of feature map sizes for each kernel width
# opt.kernels = table of kernel widths
# opt.length = max length of a word
# opt.use_words = 1 if use word embeddings, otherwise not
# opt.use_chars = 1 if use char embeddings, otherwise not
# opt.highway_layers = number of highway layers to use, if any
# opt.batch_size = number of sequences in each batch
if opt.use_words:
word = Input(batch_shape=(opt.batch_size, opt.seq_length), dtype='int32', name='word')
word_vecs = Embedding(opt.word_vocab_size, opt.word_vec_size, input_length=opt.seq_length)(word)
if opt.use_chars:
chars = Input(batch_shape=(opt.batch_size, opt.seq_length, opt.max_word_l), dtype='int32', name='chars')
chars_embedding = TimeDistributed(Embedding(opt.char_vocab_size, opt.char_vec_size, name='chars_embedding'))(chars)
cnn = CNN(opt.seq_length, opt.max_word_l, opt.char_vec_size, opt.feature_maps, opt.kernels, chars_embedding)
if opt.use_words:
x = Concatenate()([cnn, word_vecs])
inputs = [chars, word]
else:
x = cnn
inputs = chars
else:
x = word_vecs
inputs = word
if opt.batch_norm:
x = BatchNormalization()(x)
for l in range(opt.highway_layers):
x = TimeDistributed(Highway(activation='relu'))(x)
for l in range(opt.num_layers):
x = LSTM(opt.rnn_size, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, stateful=True)(x)
if opt.dropout > 0:
x = Dropout(opt.dropout)(x)
output = TimeDistributed(Dense(opt.word_vocab_size, activation='softmax'))(x)
model = sModel(inputs=inputs, outputs=output)
model.summary()
optimizer = sSGD(lr=opt.learning_rate, clipnorm=opt.max_grad_norm, scale=float(opt.seq_length))
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer)
return model