def build_model(self):
with tf.variable_scope("encoder"):
source_xs = tf.nn.embedding_lookup(self.s_emb, self.source)
source_xs = tf.split(1, self.max_size, source_xs)
with tf.variable_scope("decoder"):
target_xs = tf.nn.embedding_lookup(self.t_emb, self.target)
target_xs = tf.split(1, self.max_size, target_xs)
s = self.encoder.zero_state(self.batch_size, tf.float32)
encoder_hs = []
with tf.variable_scope("encoder"):
for t in xrange(self.max_size):
if t > 0: tf.get_variable_scope().reuse_variables()
x = tf.squeeze(source_xs[t], [1])
x = tf.matmul(x, self.s_proj_W) + self.s_proj_b
h, s = self.encoder(x, s)
encoder_hs.append(h)
encoder_hs = tf.pack(encoder_hs)
s = self.decoder.zero_state(self.batch_size, tf.float32)
logits = []
probs = []
with tf.variable_scope("decoder"):
for t in xrange(self.max_size):
if t > 0: tf.get_variable_scope().reuse_variables()
if not self.is_test or t == 0:
x = tf.squeeze(target_xs[t], [1])
x = tf.matmul(x, self.t_proj_W) + self.t_proj_b
h_t, s = self.decoder(x, s)
h_tld = self.attention(h_t, encoder_hs)
oemb = tf.matmul(h_tld, self.proj_W) + self.proj_b
logit = tf.matmul(oemb, self.proj_Wo) + self.proj_bo
prob = tf.nn.softmax(logit)
logits.append(logit)
probs.append(prob)
if self.is_test:
x = tf.cast(tf.argmax(prob, 1), tf.int32)
x = tf.nn.embedding_lookup(self.t_emb, x)
logits = logits[:-1]
targets = tf.split(1, self.max_size, self.target)[1:]
weights = tf.unpack(tf.sequence_mask(self.target_len - 1, self.max_size - 1,
dtype=tf.float32), None, 1)
self.loss = tf.nn.seq2seq.sequence_loss(logits, targets, weights)
self.probs = tf.transpose(tf.pack(probs), [1, 0, 2])
self.optim = tf.contrib.layers.optimize_loss(self.loss, None,
self.lr_init, "SGD", clip_gradients=5.,
summaries=["learning_rate", "loss", "gradient_norm"])
tf.initialize_all_variables().run()
self.saver = tf.train.Saver()
评论列表
文章目录