LSTMLayer.py 文件源码-python代码片段

def __init__(self, shape, name):

      # input to LSTM, similar to the above, we stack the matricies for compactness, do one
      # dot product, and use the slice function below to get the activations for each "gate"
      self.W = theano.shared(numpy.concatenate(
         [utils.norm_weight(shape[0],shape[1]),
          utils.norm_weight(shape[0],shape[1]),
          utils.norm_weight(shape[0],shape[1]),
          utils.norm_weight(shape[0],shape[1])
          ], axis=1), name=name+"_W")

      # LSTM to LSTM
      self.U = theano.shared(numpy.concatenate(
         [utils.ortho_weight(shape[1]),
          utils.ortho_weight(shape[1]),
          utils.ortho_weight(shape[1]),
          utils.ortho_weight(shape[1])
          ], axis=1), name=name+"_U")

      # bias to LSTM
      self.b = theano.shared(numpy.zeros((4 * shape[1],)).astype('float32').astype('float32'), name=name+"_b")

      # context to LSTM
      self.Wc = theano.shared(utils.norm_weight(shape[2], 4 * shape[1]), name=name+"_Wc")

      # attention: context -> hidden
      self.Wc_att = theano.shared(utils.norm_weight(shape[2], ortho=False),  name=name+"_Wc_att")

      # attention: LSTM -> hidden
      self.Wd_att = theano.shared(utils.norm_weight(shape[1],shape[2]), name=name+"_Wd_att")

      # attention: hidden bias
      self.b_att = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att")

      # optional "deep" attention
      self.W_att_1 = theano.shared(utils.ortho_weight(shape[2]), name=name+"_W_att_1")
      self.b_att_1 = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att_1")

      # attention:
      self.U_att = theano.shared(utils.norm_weight(shape[2], 1), name=name+"_U_att")
      self.c_att = theano.shared(numpy.zeros((1,)).astype('float32'), name=name+"_c_att")

      # attention: selector
      self.W_sel = theano.shared(utils.norm_weight(shape[1], 1), name=name+"_W_sel")
      self.b_sel = theano.shared(numpy.float32(0.), name=name+"_b_sel")