def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with tf.variable_scope(scope or type(self).__name__): # "DilatedLSTMCell"
# Parameters of gates are concatenated into one multiply for efficiency.
c, h = tf.split(state, 2, axis=1)
concat = self._linear([inputs, h], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = tf.split(concat, 4, axis=1)
new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j)
new_h = tf.tanh(new_c) * tf.sigmoid(o)
# update relevant cores
timestep = tf.assign_add(self._timestep, 1)
core_to_update = tf.mod(timestep, self._cores)
updated_h = self._hold_mask[core_to_update] * h + self._dilated_mask[core_to_update] * new_h
return updated_h, tf.concat([new_c, updated_h], axis=1)
python类sigmoid()的实例源码
def last_conv(input, reuse=False, use_sigmoid=False, name=None):
""" Last convolutional layer of discriminator network
(1 filter with size 4x4, stride 1)
Args:
input: 4D tensor
reuse: boolean
use_sigmoid: boolean (False if use lsgan)
name: string, e.g. 'C64'
"""
with tf.variable_scope(name, reuse=reuse):
weights = _weights("weights",
shape=[4, 4, input.get_shape()[3], 1])
biases = _biases("biases", [1])
conv = tf.nn.conv2d(input, weights,
strides=[1, 1, 1, 1], padding='SAME')
output = conv + biases
if use_sigmoid:
output = tf.sigmoid(output)
return output
### Helpers
def __call__(self, inputs, state, scope=None):
current_state = state[0]
noise_i = state[1]
noise_h = state[2]
for i in range(self.depth):
with tf.variable_scope('h_'+str(i)):
if i == 0:
h = tf.tanh(linear([inputs * noise_i, current_state * noise_h], self._num_units, True))
else:
h = tf.tanh(linear([current_state * noise_h], self._num_units, True))
with tf.variable_scope('t_'+str(i)):
if i == 0:
t = tf.sigmoid(linear([inputs * noise_i, current_state * noise_h], self._num_units, True, self.forget_bias))
else:
t = tf.sigmoid(linear([current_state * noise_h], self._num_units, True, self.forget_bias))
current_state = (h - current_state)* t + current_state
return current_state, [current_state, noise_i, noise_h]
def _lstm(self, input_h, input_c, input_x, reuse=False):
with tf.variable_scope('level2_lstm', reuse=reuse):
w_i2h_ = np.transpose(self.model_load['/core/i2h_1/weight'][:], (1, 0))
b_i2h_ = self.model_load['/core/i2h_1/bias'][:]
w_h2h_ = np.transpose(self.model_load['/core/h2h_1/weight'][:], (1, 0))
b_h2h_ = self.model_load['/core/h2h_1/bias'][:]
w_i2h = tf.get_variable('w_i2h', initializer=w_i2h_)
b_i2h = tf.get_variable('b_i2h', initializer=b_i2h_)
w_h2h = tf.get_variable('w_h2h', initializer=w_h2h_)
b_h2h = tf.get_variable('b_h2h', initializer=b_h2h_)
input_x = tf.cast(input_x, tf.float32)
i2h = tf.matmul(input_x, w_i2h) + b_i2h
h2h = tf.matmul(input_h, w_h2h) + b_h2h
all_input_sums = i2h + h2h
reshaped = tf.reshape(all_input_sums, [-1, 4, self.H])
n1, n2, n3, n4 = tf.unstack(reshaped, axis=1)
in_gate = tf.sigmoid(n1)
forget_gate = tf.sigmoid(n2)
out_gate = tf.sigmoid(n3)
in_transform = tf.tanh(n4)
c = tf.multiply(forget_gate, input_c) + tf.multiply(in_gate, in_transform)
h = tf.multiply(out_gate, tf.tanh(c))
return c, h
Bidirectionnet_GMM_sigmod9000feat.py 文件源码
项目:image-text-matching
作者: llltttppp
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def top_K_loss_margin(self,sentence,image,K=50,margin=0.2):
sim_matrix = tf.matmul(sentence, image, transpose_b=True)
s_square = tf.reduce_sum(tf.square(sentence), axis=1)
im_square = tf.reduce_sum(tf.square(image), axis=1)
d = 1-tf.sigmoid(sim_matrix)
positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1)
length = tf.shape(d)[-1]
dd = tf.matrix_set_diag(d, 8 * tf.ones([length]))
flag =8-7*tf.sign(tf.nn.relu(self.sen_margin-self.sen_similarity))
sen_loss_K ,_ = tf.nn.top_k(-1.0 * dd *flag, K, sorted=False) # note: this is negative value
im_loss_K,_ = tf.nn.top_k(-tf.transpose(1.0 * dd*flag), K, sorted=False) # note: this is negative value
sentence_center_loss = -tf.log(1-positive+1e-12)-tf.log(-sen_loss_K+1e-12)
image_center_loss = -tf.log(1-positive+1e-12)-tf.log(-im_loss_K+1e-12)
self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0)
self.d_pos =tf.reduce_mean(positive)
self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K
self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K
self.endpoint['debug/d_Matrix'] = d
self.endpoint['debug/positive'] = positive
self.endpoint['debug/s_center_loss'] = sentence_center_loss
self.endpoint['debug/i_center_loss'] = image_center_loss
self.endpoint['debug/S'] = sim_matrix
self.endpoint['debug/sentence_square'] = s_square
self.endpoint['debug/image_square'] = im_square
return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def lstm_func(x, h, c, wx, wh, b):
"""
x: (N, D)
h: (N, H)
c: (N, H)
wx: (D, 4H)
wh: (H, 4H)
b: (4H, )
"""
N, H = tf.shape(h)[0], tf.shape(h)[1]
a = tf.reshape(tf.matmul(x, wx) + tf.matmul(h, wh) + b, (N, -1, H))
i, f, o, g = a[:,0,:], a[:,1,:], a[:,2,:], a[:,3,:]
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = f * c + i * g
next_h = o * tf.tanh(next_c)
return next_h, next_c
def generator_graph(fake_imgs, units_size, out_size, alpha=0.01):
# ????????????? ????scope
with tf.variable_scope('generator'):
# ????????
layer = tf.layers.dense(fake_imgs, units_size)
# leaky ReLU ????
relu = tf.maximum(alpha * layer, layer)
# dropout ?????
drop = tf.layers.dropout(relu, rate=0.2)
# logits
# out_size??????size??
logits = tf.layers.dense(drop, out_size)
# ???? ??????????? ? ????????
# ??tanh????sigmoid???
# ????(-1, 1) ??sigmoid??[0, 1]
outputs = tf.tanh(logits)
return logits, outputs
def make_dcgan_generator(Xk_g, n_lat, n_chan=1):
n_g_hid1 = 1024 # size of hidden layer in generator layer 1
n_g_hid2 = 128 # size of hidden layer in generator layer 2
x = Dense(n_g_hid1)(Xk_g)
x = BatchNormalization(mode=2)(x)
x = Activation('relu')(x)
x = Dense(n_g_hid2*7*7)(x)
x = BatchNormalization(mode=2)(x)
x = Activation('relu')(x)
x = Reshape((n_g_hid2, 7, 7))(x)
x = Deconvolution2D(64, 5, 5, output_shape=(128, 64, 14, 14),
border_mode='same', activation=None, subsample=(2,2),
init='orthogonal', dim_ordering='th')(x)
x = BatchNormalization(mode=2, axis=1)(x)
x = Activation('relu')(x)
g = Deconvolution2D(n_chan, 5, 5, output_shape=(128, n_chan, 28, 28),
border_mode='same', activation='sigmoid', subsample=(2,2),
init='orthogonal', dim_ordering='th')(x)
return g
def silu(_x):
return _x * tf.sigmoid(_x)
def create_model(self, model_input, vocab_size, num_frames, **unused_params):
"""Creates a model which uses a logistic classifier over the average of the
frame-level features.
This class is intended to be an example for implementors of frame level
models. If you want to train a model over averaged features it is more
efficient to average them beforehand rather than on the fly.
Args:
model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
input features.
vocab_size: The number of classes in the dataset.
num_frames: A vector of length 'batch' which indicates the number of
frames for each video (before padding).
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
'batch_size' x 'num_classes'.
"""
num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
feature_size = model_input.get_shape().as_list()[2]
max_frames = model_input.get_shape().as_list()[1]
denominators = tf.reshape(
tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
avg_pooled = tf.reduce_sum(model_input,
axis=[1]) / denominators
output = slim.fully_connected(
avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid,
weights_regularizer=slim.l2_regularizer(1e-8))
return {"predictions": output}
def sub_moe(self,
model_input,
vocab_size,
num_mixtures = None,
l2_penalty=1e-8,
scopename="",
**unused_params):
num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
gate_activations = slim.fully_connected(
model_input,
vocab_size * (num_mixtures + 1),
activation_fn=None,
biases_initializer=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="gates"+scopename)
expert_activations = slim.fully_connected(
model_input,
vocab_size * num_mixtures,
activation_fn=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="experts"+scopename)
gating_distribution = tf.nn.softmax(tf.reshape(
gate_activations,
[-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
expert_distribution = tf.nn.sigmoid(tf.reshape(
expert_activations,
[-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
final_probabilities_by_class_and_batch = tf.reduce_sum(
gating_distribution[:, :num_mixtures] * expert_distribution, 1)
final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
[-1, vocab_size])
return model_input, final_probabilities
def sub_model(self, model_input, vocab_size, num_mixtures=None,
l2_penalty=1e-8, sub_scope="", distill_labels=None,**unused_params):
num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
class_size = 256
if distill_labels is not None:
class_input = slim.fully_connected(
distill_labels,
class_size,
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="class_inputs")
class_input = tf.nn.l2_normalize(class_input, dim=1)
model_input = tf.concat((model_input, class_input),axis=1)
gate_activations = slim.fully_connected(
model_input,
vocab_size * (num_mixtures + 1),
activation_fn=None,
biases_initializer=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="gates-"+sub_scope)
expert_activations = slim.fully_connected(
model_input,
vocab_size * num_mixtures,
activation_fn=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="experts-"+sub_scope)
gating_distribution = tf.nn.softmax(tf.reshape(
gate_activations,
[-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
expert_distribution = tf.nn.sigmoid(tf.reshape(
expert_activations,
[-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
final_probabilities_by_class_and_batch = tf.reduce_sum(
gating_distribution[:, :num_mixtures] * expert_distribution, 1)
final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
[-1, vocab_size])
return final_probabilities
def decoder(z, reuse=False):
with tf.variable_scope('decoder') as vs:
if reuse:
vs.reuse_variables()
fc1 = fc_relu(z, 1024)
fc2 = fc_relu(fc1, 7*7*128)
fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128]))
conv1 = conv2d_t_relu(fc2, 64, 4, 2)
output = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid)
return output
def encoder(x, z_dim):
with tf.variable_scope('encoder'):
conv1 = conv2d_lrelu(x, 64, 4, 2) # None x 14 x 14 x 64
conv2 = conv2d_lrelu(conv1, 128, 4, 2) # None x 7 x 7 x 128
conv2 = tf.reshape(conv2, [-1, np.prod(conv2.get_shape().as_list()[1:])]) # None x (7x7x128)
fc1 = fc_lrelu(conv2, 1024)
mean = tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.identity)
stddev = tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.sigmoid)
stddev = tf.maximum(stddev, 0.005)
return mean, stddev
def decoder(z, reuse=False):
with tf.variable_scope('decoder') as vs:
if reuse:
vs.reuse_variables()
fc1 = fc_relu(z, 1024)
fc2 = fc_relu(fc1, 7*7*128)
fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128]))
conv1 = conv2d_t_relu(fc2, 64, 4, 2)
mean = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid)
stddev = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid)
stddev = tf.maximum(stddev, 0.005)
return mean, stddev
# Build the computation graph for training
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__):
c_prev, h_prev, update_prob_prev, cum_update_prob_prev = state
# Parameters of gates are concatenated into one multiply for efficiency.
concat = rnn_ops.linear([inputs, h_prev], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1)
if self._layer_norm:
i = rnn_ops.layer_norm(i, name="i")
j = rnn_ops.layer_norm(j, name="j")
f = rnn_ops.layer_norm(f, name="f")
o = rnn_ops.layer_norm(o, name="o")
new_c_tilde = (c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j))
new_h_tilde = self._activation(new_c_tilde) * tf.sigmoid(o)
# Compute value for the update prob
with tf.variable_scope('state_update_prob'):
new_update_prob_tilde = rnn_ops.linear(new_c_tilde, 1, True, bias_start=self._update_bias)
new_update_prob_tilde = tf.sigmoid(new_update_prob_tilde)
# Compute value for the update gate
cum_update_prob = cum_update_prob_prev + tf.minimum(update_prob_prev, 1. - cum_update_prob_prev)
update_gate = _binary_round(cum_update_prob)
# Apply update gate
new_c = update_gate * new_c_tilde + (1. - update_gate) * c_prev
new_h = update_gate * new_h_tilde + (1. - update_gate) * h_prev
new_update_prob = update_gate * new_update_prob_tilde + (1. - update_gate) * update_prob_prev
new_cum_update_prob = update_gate * 0. + (1. - update_gate) * cum_update_prob
new_state = SkipLSTMStateTuple(new_c, new_h, new_update_prob, new_cum_update_prob)
new_output = SkipLSTMOutputTuple(new_h, update_gate)
return new_output, new_state
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__):
h_prev, update_prob_prev, cum_update_prob_prev = state
# Parameters of gates are concatenated into one multiply for efficiency.
with tf.variable_scope("gates"):
concat = rnn_ops.linear([inputs, h_prev], 2 * self._num_units, bias=True, bias_start=1.0)
# r = reset_gate, u = update_gate
r, u = tf.split(value=concat, num_or_size_splits=2, axis=1)
if self._layer_norm:
r = rnn_ops.layer_norm(r, name="r")
u = rnn_ops.layer_norm(u, name="u")
# Apply non-linearity after layer normalization
r = tf.sigmoid(r)
u = tf.sigmoid(u)
with tf.variable_scope("candidate"):
new_c_tilde = self._activation(rnn_ops.linear([inputs, r * h_prev], self._num_units, True))
new_h_tilde = u * h_prev + (1 - u) * new_c_tilde
# Compute value for the update prob
with tf.variable_scope('state_update_prob'):
new_update_prob_tilde = rnn_ops.linear(new_h_tilde, 1, True, bias_start=self._update_bias)
new_update_prob_tilde = tf.sigmoid(new_update_prob_tilde)
# Compute value for the update gate
cum_update_prob = cum_update_prob_prev + tf.minimum(update_prob_prev, 1. - cum_update_prob_prev)
update_gate = _binary_round(cum_update_prob)
# Apply update gate
new_h = update_gate * new_h_tilde + (1. - update_gate) * h_prev
new_update_prob = update_gate * new_update_prob_tilde + (1. - update_gate) * update_prob_prev
new_cum_update_prob = update_gate * 0. + (1. - update_gate) * cum_update_prob
new_state = SkipGRUStateTuple(new_h, new_update_prob, new_cum_update_prob)
new_output = SkipGRUOutputTuple(new_h, update_gate)
return new_output, new_state
def __init__(self, logits):
self.logits = logits
self.ps = tf.sigmoid(logits)
def _step(self, f, z, o):
with tf.variable_scope("fo-Pool"):
# f,z,o is batch_size x size
f = tf.sigmoid(f)
z = tf.tanh(z)
o = tf.sigmoid(o)
self.c = tf.mul(f, self.c) + tf.mul(1 - f, z)
self.h = tf.mul(o, self.c) # h is size vector
return self.h
def __discriminator(self, x, scope, reuse, hidden_units):
with tf.variable_scope(scope, reuse=reuse):
h1 = tf.layers.dense(x, hidden_units, activation=None)
h1 = LeakyReLU(h1, self.alpha)
logits = tf.layers.dense(h1, 1, activation=None)
out = tf.sigmoid(logits)
return out, logits
#---------------------------------------------------------------------------