def __call__(self, inputs, state, scope=None):
"""Gated recurrent unit (GRU) with nunits cells."""
with tf.variable_scope(scope or type(self).__name__): # "GRUCell"
if self.pretanh:
state = state[:, :self.num_units]
with tf.variable_scope("Gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
r, u = tf.split(1, 2, utils.linear([inputs, state], 2 * self.num_units, True, 1.0))
r, u = tf.nn.sigmoid(r), tf.nn.sigmoid(u)
with tf.variable_scope("Candidate"):
preact = utils.linear([inputs, r * state], self.num_units, True)
c = self.activation(preact)
new_h = u * state + (1 - u) * c
if self.pretanh:
new_state = tf.concat(1, [new_h, preact])
else:
new_state = new_h
return new_h, new_state
python类linear()的实例源码
def _discriminator_conv(self, states):
'''Convolve output of bidirectional RNN and predict the discriminator label.'''
with tf.variable_scope("Discriminator"):
W_conv = tf.get_variable('W_conv', [cfg.d_conv_window, 1, states.get_shape()[2],
cfg.hidden_size // cfg.d_conv_window],
initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_conv = tf.get_variable('b_conv', [cfg.hidden_size // cfg.d_conv_window],
initializer=tf.constant_initializer(0.0))
states = tf.expand_dims(states, 2)
conv = tf.nn.conv2d(states, W_conv, strides=[1, 1, 1, 1], padding='SAME')
conv_out = tf.reshape(conv, [2 * cfg.batch_size, -1,
cfg.hidden_size // cfg.d_conv_window])
conv_out = tf.nn.bias_add(conv_out, b_conv)
reduced = tf.nn.elu(tf.reduce_sum(conv_out, [1])) * 1e-1
output = utils.linear(reduced, 1, True, 0.0, scope='discriminator_output')
return output
def __call__(self, inputs, state, scope=None):
with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
h, n, d = state
with vs.variable_scope("u"):
u = linear(inputs, self._num_units, True, normalize=self._normalize)
with vs.variable_scope("g"):
g = linear([inputs, h], self._num_units, True, normalize=self._normalize)
with vs.variable_scope("a"): # The bias term when factored out of the numerator and denominator cancels and is unnecessary
a = tf.exp(linear([inputs, h], self._num_units, True, normalize=self._normalize))
with vs.variable_scope("discount_factor"):
discount_factor = tf.nn.sigmoid(linear([inputs, h], self._num_units, True, normalize=self._normalize))
z = tf.multiply(u, tanh(g))
n = tf.multiply(n, discount_factor) + tf.multiply(z, a) # Numerically stable update of numerator
d = tf.multiply(d, discount_factor) + a # Numerically stable update of denominator
h_new = self._activation(tf.div(n, d))
new_state = RDACellTuple(h_new, n, d)
return h_new, new_state
def __call__(self, inputs, state, scope=None):
with _checked_scope(self, scope or "ran_cell", reuse=self._reuse):
with vs.variable_scope("gates"):
c, h = state
gates = tf.nn.sigmoid(linear([inputs, h], 2 * self._num_units, True, normalize=self._normalize))
i, f = array_ops.split(value=gates, num_or_size_splits=2, axis=1)
with vs.variable_scope("candidate"):
content = linear([inputs], self._num_units, True, normalize=self._normalize)
new_c = i * content + f * c
new_h = self._activation(c)
new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
output = new_h
return output, new_state
def __call__(self, inputs, state, scope=None):
with _checked_scope(self, scope or "ran_cell", reuse=self._reuse):
with vs.variable_scope("gates"):
value = tf.nn.sigmoid(linear([state, inputs], 2 * self._num_units, True, normalize=self._normalize))
i, f = array_ops.split(value=value, num_or_size_splits=2, axis=1)
with vs.variable_scope("candidate"):
c = linear([inputs], self._num_units, True, normalize=self._normalize)
new_c = i * c + f * state
new_h = self._activation(c)
return new_h, new_c
def discriminator(self, img, cond, reuse):
dim = len(img.get_shape())
with tf.variable_scope("disc", reuse=reuse):
image = tf.concat([img, cond], dim -1 )
feature = conf.conv_channel_base
h0 = lrelu(conv2d(image, feature, name="h0"))
h1 = lrelu(batch_norm(conv2d(h0, feature*2, name="h1"), "h1"))
h2 = lrelu(batch_norm(conv2d(h1, feature*4, name="h2"), "h2"))
h3 = lrelu(batch_norm(conv2d(h2, feature*8, name="h3"), "h3"))
h4 = linear(tf.reshape(h3, [1,-1]), 1, "linear")
return h4
def encoder(self, inputs):
'''Encode sentence and return a latent representation in MLE mode.'''
with tf.variable_scope("Encoder"):
if cfg.enc_bidirect:
fcell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True)
bcell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True)
outputs, _ = tf.nn.bidirectional_dynamic_rnn(fcell, bcell, inputs,
sequence_length=self.lengths,
swap_memory=True, dtype=tf.float32)
else:
cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True)
outputs, _ = tf.nn.dynamic_rnn(cell, inputs, swap_memory=True, dtype=tf.float32)
outputs = (outputs,) # to match bidirectional RNN's output format
states = []
for out in outputs:
output = out[:, :, :cfg.hidden_size]
d_states = out[:, :, cfg.hidden_size:]
# for GRU, we skipped the last layer states because they're the outputs
states.append(tf.concat(2, [d_states, output]))
states = tf.concat(2, states) # concatenated states from fwd and bwd RNNs
states = tf.reshape(states, [-1, cfg.hidden_size * len(outputs)])
states = utils.linear(states, cfg.latent_size, True, 0.0, scope='states_transform1')
states = utils.highway(states, f=tf.nn.elu)
states = utils.linear(states, cfg.latent_size, True, 0.0, scope='states_transform2')
states = tf.reshape(states, [cfg.batch_size, -1, cfg.latent_size])
latent = tf.nn.elu(tf.reduce_sum(states, [1])) * 1e-1
z_mean = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_mean')
z_logvar = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_logvar')
return z_mean, z_logvar
def discriminator_finalstate(self, states): # FIXME
'''Discriminator that operates on the final states of the sentences.'''
with tf.variable_scope("Discriminator"):
# indices = lengths - 2, since the generated output skips <sos>
#final_states = utils.rowwise_lookup(states, self.lengths - 2)
final_states = states[:, -1, :]
combined = tf.concat(1, [self.latent, final_states]) # TODO transform latent
lin1 = tf.nn.elu(utils.linear(combined, cfg.hidden_size, True, 0.0,
scope='discriminator_lin1'))
lin2 = tf.nn.elu(utils.linear(lin1, cfg.hidden_size // 2, True, 0.0,
scope='discriminator_lin2'))
output = utils.linear(lin2, 1, True, 0.0, scope='discriminator_output')
return output
def __call__(self, inputs, state, scope=None):
with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
h, n, d, a_max = state
with vs.variable_scope("u"):
u = linear(inputs, self._num_units, True, normalize=self._normalize)
with vs.variable_scope("g"):
g = linear([inputs, h], self._num_units, True, normalize=self._normalize)
with vs.variable_scope("a"): # The bias term when factored out of the numerator and denominator cancels and is unnecessary
a = linear([inputs, h], self._num_units, False, normalize=self._normalize)
z = tf.multiply(u, tanh(g))
a_newmax = tf.maximum(a_max, a)
exp_diff = tf.exp(a_max - a_newmax)
exp_scaled = tf.exp(a - a_newmax)
n_new = tf.multiply(n, exp_diff) + tf.multiply(z, exp_scaled) # Numerically stable update of numerator
d_new = tf.multiply(d, exp_diff) + exp_scaled # Numerically stable update of denominator
h_new = self._activation(tf.div(n, d))
new_state = RWACellTuple(h_new, n_new, d_new, a_newmax)
return h_new, new_state
def decoder(self, inputs, mle_mode, reuse=None):
'''Use the latent representation and word inputs to predict next words.'''
with tf.variable_scope("Decoder", reuse=reuse):
latent = utils.highway(self.latent, layer_size=2, f=tf.nn.elu)
latent = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_transform')
self.latent_transformed = latent
initial = []
for i in range(cfg.num_layers):
preact = utils.linear(latent, cfg.hidden_size, True, 0.0,
scope='Latent_initial%d' % i)
act = tf.nn.tanh(preact)
initial.append(tf.concat(1, [act, preact]))
if mle_mode:
inputs = tf.concat(2, [inputs, tf.tile(tf.expand_dims(latent, 1),
tf.pack([1, tf.shape(inputs)[1], 1]))])
cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True,
pretanh=True)
self.decode_cell = cell
else:
cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, latent, self.embedding,
self.softmax_w, self.softmax_b, return_states=True,
pretanh=True, get_embeddings=cfg.concat_inputs)
initial_state = cell.initial_state(initial)
if mle_mode:
self.decode_initial = initial_state
outputs, _ = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state,
swap_memory=True, dtype=tf.float32)
output = outputs[:, :, :cfg.hidden_size]
if mle_mode:
generated = None
skip = 0
else:
words = tf.squeeze(tf.cast(outputs[:, :-1, cfg.hidden_size:cfg.hidden_size+1],
tf.int32), [-1])
generated = tf.stop_gradient(tf.concat(1, [words, tf.constant(self.vocab.eos_index,
shape=[cfg.batch_size, 1])]))
skip = 1
if cfg.concat_inputs:
embeddings = outputs[:, :, cfg.hidden_size+1:cfg.hidden_size+1+cfg.emb_size]
embeddings = tf.concat(1, [inputs[:, :1, :], embeddings[:, :-1, :]])
embeddings = tf.concat(2, [embeddings, tf.tile(tf.expand_dims(latent, 1),
tf.pack([1,
tf.shape(embeddings)[1],
1]))])
skip += cfg.emb_size
states = outputs[:, :, cfg.hidden_size+skip:]
if cfg.concat_inputs:
if mle_mode:
states = tf.concat(2, [states, inputs])
else:
states = tf.concat(2, [states, embeddings])
return output, states, generated