attention.py 文件源码-python代码片段

def build_model(self):
        with tf.variable_scope("encoder"):
            source_xs = tf.nn.embedding_lookup(self.s_emb, self.source)
            source_xs = tf.split(1, self.max_size, source_xs)
        with tf.variable_scope("decoder"):
            target_xs = tf.nn.embedding_lookup(self.t_emb, self.target)
            target_xs = tf.split(1, self.max_size, target_xs)

        s = self.encoder.zero_state(self.batch_size, tf.float32)
        encoder_hs = []
        with tf.variable_scope("encoder"):
            for t in xrange(self.max_size):
                if t > 0: tf.get_variable_scope().reuse_variables()
                x = tf.squeeze(source_xs[t], [1])
                x = tf.matmul(x, self.s_proj_W) + self.s_proj_b
                h, s = self.encoder(x, s)
                encoder_hs.append(h)
        encoder_hs = tf.pack(encoder_hs)

        s = self.decoder.zero_state(self.batch_size, tf.float32)
        logits = []
        probs  = []
        with tf.variable_scope("decoder"):
            for t in xrange(self.max_size):
                if t > 0: tf.get_variable_scope().reuse_variables()
                if not self.is_test or t == 0:
                    x = tf.squeeze(target_xs[t], [1])
                x = tf.matmul(x, self.t_proj_W) + self.t_proj_b
                h_t, s = self.decoder(x, s)
                h_tld = self.attention(h_t, encoder_hs)

                oemb  = tf.matmul(h_tld, self.proj_W) + self.proj_b
                logit = tf.matmul(oemb, self.proj_Wo) + self.proj_bo
                prob  = tf.nn.softmax(logit)
                logits.append(logit)
                probs.append(prob)
                if self.is_test:
                    x = tf.cast(tf.argmax(prob, 1), tf.int32)
                    x = tf.nn.embedding_lookup(self.t_emb, x)

        logits     = logits[:-1]
        targets    = tf.split(1, self.max_size, self.target)[1:]
        weights    = tf.unpack(tf.sequence_mask(self.target_len - 1, self.max_size - 1,
                                                dtype=tf.float32), None, 1)
        self.loss  = tf.nn.seq2seq.sequence_loss(logits, targets, weights)
        self.probs = tf.transpose(tf.pack(probs), [1, 0, 2])

        self.optim = tf.contrib.layers.optimize_loss(self.loss, None,
                self.lr_init, "SGD", clip_gradients=5.,
                summaries=["learning_rate", "loss", "gradient_norm"])

        tf.initialize_all_variables().run()
        self.saver = tf.train.Saver()