def gru_cell_decoder(self, Xt, h_t_minus_1,context_vector):
"""
single step of gru for word level
:param Xt: Xt:[batch_size,embed_size]
:param h_t_minus_1:[batch_size,embed_size]
:param context_vector. [batch_size,embed_size].this represent the result from attention( weighted sum of input during current decoding step)
:return:
"""
# 1.update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_decoder) + tf.matmul(h_t_minus_1,self.U_z_decoder) +tf.matmul(context_vector,self.C_z_decoder)+self.b_z_decoder) # z_t:[batch_size,self.hidden_size]
# 2.reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_decoder) + tf.matmul(h_t_minus_1,self.U_r_decoder) +tf.matmul(context_vector,self.C_r_decoder)+self.b_r_decoder) # r_t:[batch_size,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_decoder) +r_t * (tf.matmul(h_t_minus_1, self.U_h_decoder)) +tf.matmul(context_vector, self.C_h_decoder)+ self.b_h_decoder) # h_t_candiate:[batch_size,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size]
return h_t,h_t
# forward gru for first level: word levels
a1_seq2seq_attention_model.py 文件源码
python
阅读 37
收藏 0
点赞 0
评论 0
评论列表
文章目录