def __init__(self, shape, name):
# input to LSTM, similar to the above, we stack the matricies for compactness, do one
# dot product, and use the slice function below to get the activations for each "gate"
self.W = theano.shared(numpy.concatenate(
[utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1])
], axis=1), name=name+"_W")
# LSTM to LSTM
self.U = theano.shared(numpy.concatenate(
[utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1])
], axis=1), name=name+"_U")
# bias to LSTM
self.b = theano.shared(numpy.zeros((4 * shape[1],)).astype('float32').astype('float32'), name=name+"_b")
# context to LSTM
self.Wc = theano.shared(utils.norm_weight(shape[2], 4 * shape[1]), name=name+"_Wc")
# attention: context -> hidden
self.Wc_att = theano.shared(utils.norm_weight(shape[2], ortho=False), name=name+"_Wc_att")
# attention: LSTM -> hidden
self.Wd_att = theano.shared(utils.norm_weight(shape[1],shape[2]), name=name+"_Wd_att")
# attention: hidden bias
self.b_att = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att")
# optional "deep" attention
self.W_att_1 = theano.shared(utils.ortho_weight(shape[2]), name=name+"_W_att_1")
self.b_att_1 = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att_1")
# attention:
self.U_att = theano.shared(utils.norm_weight(shape[2], 1), name=name+"_U_att")
self.c_att = theano.shared(numpy.zeros((1,)).astype('float32'), name=name+"_c_att")
# attention: selector
self.W_sel = theano.shared(utils.norm_weight(shape[1], 1), name=name+"_W_sel")
self.b_sel = theano.shared(numpy.float32(0.), name=name+"_b_sel")
评论列表
文章目录