def test_softmax():
'''
Test using a reference implementation of softmax
'''
def softmax(values):
m = np.max(values)
e = np.exp(values - m)
return e / np.sum(e)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = softmax(test_values)
assert_allclose(result, expected, rtol=1e-05)
python类softmax()的实例源码
def __init__(self, coords=4, classes=20, num=1,
log=0, sqrt=0, softmax=0, background=0, max=30,
jitter=0.2,
rescore = 0, thresh=0.5, classfix=0, absolute=0, random=0,
coord_scale=1, object_scale=1,
noobject_scale=1, class_scale=1,
bias_match=0,
tree=None,#tree_file for softmax_tree - not used now
map_filename=None, # file name for map_file - not used
anchors=None,
**kwargs
):
super(Region, self).__init__(**kwargs)
self.coords = coords
self.classes = classes
self.num = num
self.background = background
print(coords, classes)
self.c = (self.coords+self.classes+1)*num
if anchors:
self.biases = list(map(float, anchors))
pass
def _process_input(self, x):
"""Apply logistic and softmax activations to input tensor
"""
logistic_activate = lambda x: 1.0/(1.0 + K.exp(-x))
(batch, w, h, channels) = x.get_shape()
x_temp = K.permute_dimensions(x, (3, 0, 1, 2))
x_t = []
for i in range(self.num):
k = self._entry_index(i, 0)
x_t.extend([
logistic_activate(K.gather(x_temp, (k, k + 1))), # 0
K.gather(x_temp, (k + 2, k + 3))])
if self.background:
x_t.append(K.gather(x_temp, (k + 4,)))
else:
x_t.append(logistic_activate(K.gather(x_temp, (k + 4,))))
x_t.append(
softmax(
K.gather(x_temp, tuple(range(k + 5, k + self.coords + self.classes + 1))),
axis=0))
x_t = K.concatenate(x_t, axis=0)
return K.permute_dimensions(x_t, (1, 2, 3, 0))
def test_softmax():
from keras.activations import softmax as s
# Test using a reference implementation of softmax
def softmax(values):
m = max(values)
values = numpy.array(values)
e = numpy.exp(values - m)
dist = list(e / numpy.sum(e))
return dist
x = T.vector()
exp = s(x)
f = theano.function([x], exp)
test_values=get_standard_values()
result = f(test_values)
expected = softmax(test_values)
print(str(result))
print(str(expected))
list_assert_equal(result, expected)
def test_softmax():
'''
Test using a reference implementation of softmax
'''
def softmax(values):
m = np.max(values)
e = np.exp(values - m)
return e / np.sum(e)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = softmax(test_values)
assert_allclose(result, expected, rtol=1e-05)
def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma):
# M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ')
# M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ')
# Content adressing, see Chapter 3.3.1:
num = beta * _cosine_distance(M, k)
w_c = K.softmax(num) # It turns out that equation (5) is just softmax.
# Location adressing, see Chapter 3.3.2:
# Equation 7:
w_g = (g * w_c) + (1-g)*w_tm1
# C_s is the circular convolution
#C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3)
# Equation 8:
# TODO: Explain
C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1)
w_tilda = K.batch_dot(C_s, w_g)
# Equation 9:
w_out = _renorm(w_tilda ** gamma)
return w_out
def test_softmax():
'''
Test using a reference implementation of softmax
'''
def softmax(values):
m = np.max(values)
e = np.exp(values - m)
return e / np.sum(e)
x = K.placeholder(ndim=2)
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
result = f([test_values])[0]
expected = softmax(test_values)
assert_allclose(result, expected, rtol=1e-05)
def test_softmax():
from keras.activations import softmax as s
# Test using a reference implementation of softmax
def softmax(values):
m = max(values)
values = numpy.array(values)
e = numpy.exp(values - m)
dist = list(e / numpy.sum(e))
return dist
x = T.vector()
exp = s(x)
f = theano.function([x], exp)
test_values = get_standard_values()
result = f(test_values)
expected = softmax(test_values)
print(str(result))
print(str(expected))
list_assert_equal(result, expected)
def step(self, x_input, states):
#print "x_input:", x_input, x_input.shape
# <TensorType(float32, matrix)>
input_shape = self.input_spec[0].shape
en_seq = states[-1]
_, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])
# vt*tanh(W1*e+W2*d)
dec_seq = K.repeat(h, input_shape[1])
Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
U = self.vt * tanh(Eij + Dij)
U = K.squeeze(U, 2)
# make probability tensor
pointer = softmax(U)
return pointer, [h, c]
def test_softmax():
from keras.activations import softmax as s
# Test using a reference implementation of softmax
def softmax(values):
m = max(values)
values = numpy.array(values)
e = numpy.exp(values - m)
dist = list(e / numpy.sum(e))
return dist
x = T.vector()
exp = s(x)
f = theano.function([x], exp)
test_values=get_standard_values()
result = f(test_values)
expected = softmax(test_values)
print(str(result))
print(str(expected))
list_assert_equal(result, expected)
def call(self,logits):
u = K.random_uniform(K.shape(logits), 0, 1)
gumbel = - K.log(-K.log(u + 1e-20) + 1e-20)
return K.in_train_phase(
K.softmax( ( logits + gumbel ) / self.tau ),
K.softmax( ( logits + gumbel ) / self.min ))
def loss(self):
logits = self.logits
q = K.softmax(logits)
log_q = K.log(q + 1e-20)
return - K.mean(q * (log_q - K.log(1.0/K.int_shape(logits)[-1])),
axis=tuple(range(1,len(K.int_shape(logits)))))
def test_time_distributed_softmax():
x = K.placeholder(shape=(1, 1, 5))
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
test_values = np.reshape(test_values, (1, 1, np.size(test_values)))
f([test_values])[0]
def test_time_distributed_softmax():
x = K.placeholder(shape=(1, 1, 5))
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
test_values = np.reshape(test_values, (1, 1, np.size(test_values)))
f([test_values])[0]
def __init__(self, model):
"""
Keras classifier wrapper.
Note that the wrapped classifier should spit logits as output.
"""
layer_id = len(model.layers)-2
self.model = Model(inputs=model.layers[0].input, outputs=model.layers[layer_id].output)
self.softmax = Sequential()
self.softmax.add(Lambda(lambda X: softmax(X, axis=1), input_shape=(10,)))
def classify(self, X, option="logit", T=1):
if option == "logit":
return self.model.predict(X)
if option == "prob":
logits = self.model.predict(X)/T
return self.softmax.predict(logits)
def __init__(self, model):
"""
Keras classifier wrapper.
Note that the wrapped classifier should spit logits as output.
"""
layer_id = len(model.layers)-2
self.model = Model(inputs=model.layers[0].input, outputs=model.layers[layer_id].output)
self.softmax = Sequential()
self.softmax.add(Lambda(lambda X: softmax(X, axis=1), input_shape=(10,)))
def classify(self, X, option="logit", T=1):
if option == "logit":
return self.model.predict(X)
if option == "prob":
logits = self.model.predict(X)/T
return self.softmax.predict(logits)
def test_time_distributed_softmax():
x = K.placeholder(shape=(1, 1, 5))
f = K.function([x], [activations.softmax(x)])
test_values = get_standard_values()
test_values = np.reshape(test_values, (1, 1, np.size(test_values)))
f([test_values])[0]
def build(self):
query = Input(name='query', shape=(self.config['vocab_size'],))#, sparse=True)
show_layer_info('Input', query)
doc = Input(name='doc', shape=(self.config['vocab_size'],))#, sparse=True)
show_layer_info('Input', doc)
def mlp_work(input_dim):
seq = Sequential()
#seq.add(SparseFullyConnectedLayer(self.config['hidden_sizes'][0], input_dim=input_dim, activation='relu'))
num_hidden_layers = len(self.config['hidden_sizes'])
if num_hidden_layers == 1:
seq.add(Dense(self.config['hidden_sizes'][0], input_shape=(input_dim,), activity_regularizer=regularizers.l2(self.config['reg_rate'])))
else:
seq.add(Dense(self.config['hidden_sizes'][0], activation='tanh', input_shape=(input_dim,), activity_regularizer=regularizers.l2(self.config['reg_rate'])))
for i in range(num_hidden_layers-2):
seq.add(Dense(self.config['hidden_sizes'][i+1], activation='tanh', activity_regularizer=regularizers.l2(self.config['reg_rate'])))
seq.add(Dropout(rate=self.config['dropout_rate']))
seq.add(Dense(self.config['hidden_sizes'][num_hidden_layers-1], activity_regularizer=regularizers.l2(self.config['reg_rate'])))
seq.add(Dropout(rate=self.config['dropout_rate']))
return seq
mlp = mlp_work(self.config['vocab_size'])
rq = mlp(query)
show_layer_info('MLP', rq)
rd = mlp(doc)
show_layer_info('MLP', rd)
'''
rep = Concatenate(axis=1) ([rq, rd])
show_layer_info('Concatenate', rep)
rep = Dropout(rate=self.config['dropout_rate'])(rep)
show_layer_info('Dropout', rep)
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(rep)
elif self.config['target_mode'] in ['regression', 'ranking']:
out_ = Dense(1)(rep)
show_layer_info('Dense', out_)
'''
out_ = Dot( axes= [1, 1], normalize=True)([rq, rd])
show_layer_info('Dot', out_)
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(out_)
show_layer_info('Dense', out_)
model = Model(inputs=[query, doc], outputs=[out_])
return model
def build(self):
def mlp_work(input_dim):
seq = Sequential()
num_hidden_layers = len(self.config['hidden_sizes'])
assert num_hidden_layers > 0
if num_hidden_layers == 1:
seq.add(Dense(self.config['hidden_sizes'][0], input_shape=(input_dim,)))
else:
seq.add(Dense(self.config['hidden_sizes'][0], activation='relu', input_shape=(input_dim,)))
for i in range(num_hidden_layers - 2):
seq.add(Dense(self.config['hidden_sizes'][i+1], activation='relu'))
seq.add(Dropout(self.config['dropout_rate']))
seq.add(Dense(self.config['hidden_sizes'][num_hidden_layers-1]))
seq.add(Dropout(self.config['dropout_rate']))
return seq
query = Input(name='query', shape=(self.config['text1_maxlen'],))
show_layer_info('Input', query)
doc = Input(name='doc', shape=(self.config['text2_maxlen'],))
show_layer_info('Input', doc)
wordhashing = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable)
q_embed = wordhashing(query)
show_layer_info('Embedding', q_embed)
d_embed = wordhashing(doc)
show_layer_info('Embedding', d_embed)
conv1d = Convolution1D(self.config['kernel_count'], self.config['kernel_size'], padding='same', activation='relu')
q_conv = conv1d(q_embed)
show_layer_info('Convolution1D', q_conv)
q_conv = Dropout(self.config['dropout_rate'])(q_conv)
show_layer_info('Dropout', q_conv)
d_conv = conv1d(d_embed)
show_layer_info('Convolution1D', d_conv)
d_conv = Dropout(self.config['dropout_rate'])(d_conv)
show_layer_info('Dropout', d_conv)
q_pool = MaxPooling1D(self.config['text1_maxlen'])(q_conv)
show_layer_info('MaxPooling1D', q_pool)
q_pool_re = Reshape((-1,))(q_pool)
show_layer_info('Reshape', q_pool_re)
d_pool = MaxPooling1D(self.config['text2_maxlen'])(d_conv)
show_layer_info('MaxPooling1D', d_pool)
d_pool_re = Reshape((-1,))(d_pool)
show_layer_info('Reshape', d_pool_re)
mlp = mlp_work(self.config['kernel_count'])
rq = mlp(q_pool_re)
show_layer_info('MLP', rq)
rd = mlp(d_pool_re)
show_layer_info('MLP', rd)
out_ = Dot( axes= [1, 1], normalize=True)([rq, rd])
show_layer_info('Dot', out_)
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(out_)
show_layer_info('Dense', out_)
model = Model(inputs=[query, doc], outputs=[out_])
return model
def build(self):
query = Input(name='query', shape=(self.config['text1_maxlen'],))
show_layer_info('Input', query)
doc = Input(name='doc', shape=(self.config['text2_maxlen'],))
show_layer_info('Input', doc)
embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable)
q_embed = embedding(query)
show_layer_info('Embedding', q_embed)
d_embed = embedding(doc)
show_layer_info('Embedding', d_embed)
mm = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed])
show_layer_info('Dot', mm)
# compute term gating
w_g = Dense(1)(q_embed)
show_layer_info('Dense', w_g)
g = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(w_g)
show_layer_info('Lambda-softmax', g)
g = Reshape((self.config['text1_maxlen'],))(g)
show_layer_info('Reshape', g)
mm_k = Lambda(lambda x: K.tf.nn.top_k(x, k=self.config['topk'], sorted=True)[0])(mm)
show_layer_info('Lambda-topk', mm_k)
for i in range(self.config['num_layers']):
mm_k = Dense(self.config['hidden_sizes'][i], activation='softplus', kernel_initializer='he_uniform', bias_initializer='zeros')(mm_k)
show_layer_info('Dense', mm_k)
mm_k_dropout = Dropout(rate=self.config['dropout_rate'])(mm_k)
show_layer_info('Dropout', mm_k_dropout)
mm_reshape = Reshape((self.config['text1_maxlen'],))(mm_k_dropout)
show_layer_info('Reshape', mm_reshape)
mean = Dot(axes=[1, 1])([mm_reshape, g])
show_layer_info('Dot', mean)
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(mean)
elif self.config['target_mode'] in ['regression', 'ranking']:
out_ = Reshape((1,))(mean)
show_layer_info('Dense', out_)
model = Model(inputs=[query, doc], outputs=out_)
return model
def build(self):
def tensor_product(x):
a = x[0]
b = x[1]
y = K.batch_dot(a, b, axis=1)
y = K.einsum('ijk, ikl->ijl', a, b)
return y
query = Input(name='query', shape=(self.config['text1_maxlen'],))
show_layer_info('Input', query)
doc = Input(name='doc', shape=(self.config['text1_maxlen'], self.config['bin_num']))
show_layer_info('Input', doc)
embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = False)
q_embed = embedding(query)
show_layer_info('Embedding', q_embed)
q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(q_embed)
show_layer_info('Dense', q_w)
q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(q_w)
show_layer_info('Lambda-softmax', q_w)
z = doc
z = Dropout(rate=self.config['dropout_rate'])(z)
show_layer_info('Dropout', z)
for i in range(self.config['num_layers']-1):
z = Dense(self.config['hidden_sizes'][i], kernel_initializer=self.initializer_fc)(z)
z = Activation('tanh')(z)
show_layer_info('Dense', z)
z = Dense(self.config['hidden_sizes'][self.config['num_layers']-1], kernel_initializer=self.initializer_fc)(z)
show_layer_info('Dense', z)
z = Permute((2, 1))(z)
show_layer_info('Permute', z)
z = Reshape((self.config['text1_maxlen'],))(z)
show_layer_info('Reshape', z)
q_w = Reshape((self.config['text1_maxlen'],))(q_w)
show_layer_info('Reshape', q_w)
out_ = Dot( axes= [1, 1])([z, q_w])
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(out_)
show_layer_info('Dense', out_)
model = Model(inputs=[query, doc], outputs=[out_])
return model
def build(self):
def tensor_product(x):
a = x[0]
b = x[1]
y = K.batch_dot(a, b, axis=1)
y = K.einsum('ijk, ikl->ijl', a, b)
return y
query = Input(name='query', shape=(self.config['text1_maxlen'],))
show_layer_info('Input', query)
doc = Input(name='doc', shape=(self.config['text1_maxlen'], self.config['hist_size']))
show_layer_info('Input', doc)
embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = False)
q_embed = embedding(query)
show_layer_info('Embedding', q_embed)
q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(q_embed)
show_layer_info('Dense', q_w)
q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(q_w)
show_layer_info('Lambda-softmax', q_w)
z = doc
z = Dropout(rate=self.config['dropout_rate'])(z)
show_layer_info('Dropout', z)
for i in range(self.config['num_layers']-1):
z = Dense(self.config['hidden_sizes'][i], kernel_initializer=self.initializer_fc)(z)
z = Activation('tanh')(z)
show_layer_info('Dense', z)
z = Dense(self.config['hidden_sizes'][self.config['num_layers']-1], kernel_initializer=self.initializer_fc)(z)
show_layer_info('Dense', z)
z = Permute((2, 1))(z)
show_layer_info('Permute', z)
z = Reshape((self.config['text1_maxlen'],))(z)
show_layer_info('Reshape', z)
q_w = Reshape((self.config['text1_maxlen'],))(q_w)
show_layer_info('Reshape', q_w)
out_ = Dot( axes= [1, 1])([z, q_w])
if self.config['target_mode'] == 'classification':
out_ = Dense(2, activation='softmax')(out_)
show_layer_info('Dense', out_)
model = Model(inputs=[query, doc], outputs=[out_])
return model
def test_keras_model_probs(num_classes):
bounds = (0, 255)
channels = num_classes
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
inputs = Input(shape=(5, 5, channels))
logits = GlobalAveragePooling2D(
data_format='channels_last')(inputs)
probs = Activation(softmax)(logits)
model1 = KerasModel(
Model(inputs=inputs, outputs=logits),
bounds=bounds,
predicts='logits')
model2 = KerasModel(
Model(inputs=inputs, outputs=probs),
bounds=bounds,
predicts='probabilities')
model3 = KerasModel(
Model(inputs=inputs, outputs=probs),
bounds=bounds,
predicts='probs')
np.random.seed(22)
test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
p1 = model1.batch_predictions(test_images)
p2 = model2.batch_predictions(test_images)
p3 = model3.batch_predictions(test_images)
assert p1.shape == p2.shape == p3.shape == (2, num_classes)
np.testing.assert_array_almost_equal(
p1 - p1.max(),
p2 - p2.max(),
decimal=1)
np.testing.assert_array_almost_equal(
p2 - p2.max(),
p3 - p3.max(),
decimal=5)
def step(self, x, states):
ytm, stm = states
# repeat the hidden state to the length of the sequence
_stm = K.repeat(stm, self.timesteps)
# now multiplty the weight matrix with the repeated hidden state
_Wxstm = K.dot(_stm, self.W_a)
# calculate the attention probabilities
# this relates how much other timesteps contributed to this one.
et = K.dot(activations.tanh(_Wxstm + self._uxpb),
K.expand_dims(self.V_a))
at = K.exp(et)
at_sum = K.sum(at, axis=1)
at_sum_repeated = K.repeat(at_sum, self.timesteps)
at /= at_sum_repeated # vector of size (batchsize, timesteps, 1)
# calculate the context vector
context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
# ~~~> calculate new hidden state
# first calculate the "r" gate:
rt = activations.sigmoid(
K.dot(ytm, self.W_r)
+ K.dot(stm, self.U_r)
+ K.dot(context, self.C_r)
+ self.b_r)
# now calculate the "z" gate
zt = activations.sigmoid(
K.dot(ytm, self.W_z)
+ K.dot(stm, self.U_z)
+ K.dot(context, self.C_z)
+ self.b_z)
# calculate the proposal hidden state:
s_tp = activations.tanh(
K.dot(ytm, self.W_p)
+ K.dot((rt * stm), self.U_p)
+ K.dot(context, self.C_p)
+ self.b_p)
# new hidden state:
st = (1-zt)*stm + zt * s_tp
yt = activations.softmax(
K.dot(ytm, self.W_o)
+ K.dot(stm, self.U_o)
+ K.dot(context, self.C_o)
+ self.b_o)
if self.return_probabilities:
return at, [yt, st]
else:
return yt, [yt, st]
def _split_and_apply_activations(self, controller_output):
""" This takes the controller output, splits it in ntm_output, read and wright adressing data.
It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
consisting of an erase and an add vector.
As it is necesseary for stable results,
k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
shift via softmax,
gamma is sigmoided, inversed and clipped (probably not ideal)
g is sigmoided,
beta is linear (probably not ideal!) """
# splitting
ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
controller_output,
np.asarray([self.output_dim,
self.read_heads * self.controller_read_head_emitting_dim,
self.write_heads * self.controller_write_head_emitting_dim]),
axis=1)
controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)
controller_instructions_read = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for
single_head_data in controller_instructions_read]
controller_instructions_write = [
tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for
single_head_data in controller_instructions_write]
#activation
ntm_output = self.activation(ntm_output)
controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
(k, beta, g, shift, gamma) in controller_instructions_read]
controller_instructions_write = [
(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for
(k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]
return (ntm_output, controller_instructions_read, controller_instructions_write)