def param_init_gru(options, param, prefix='gru', nin=None, dim=None):
param[prefix + '_W'] = numpy.concatenate(
[
uniform_weight(nin, dim), uniform_weight(nin, dim)
],
axis=1)
param[prefix + '_U'] = numpy.concatenate(
[
ortho_weight(dim), ortho_weight(dim)
],
axis=1)
param[prefix + '_b'] = zero_vector(2 * dim)
param[prefix + '_Wx'] = uniform_weight(nin, dim)
param[prefix + '_Ux'] = ortho_weight(dim)
param[prefix + '_bx'] = zero_vector(dim)
return param
python类ortho_weight()的实例源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def param_init_encoder(options, params, prefix='lstm_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
# It is observed that setting a high initial forget gate bias for LSTMs can
# give slighly better results (Le et al., 2015). Hence, the initial forget
# gate bias is set to 3.
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_encoder(options, params, prefix='gru_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
params[_p(prefix,'b')] = zero_bias(2*n_h)
Wx = uniform_weight(n_x, n_h)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(n_h)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = zero_bias(n_h)
return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'):
assert prefix is not None
# Stack the weight matricies for faster dot prods
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32')
return params
# This function implements the lstm fprop
def param_init_encoder(options, params, prefix='encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_encoder(options, params, prefix='encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_encoder(options, params, prefix='lstm_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
# It is observed that setting a high initial forget gate bias for LSTMs can
# give slighly better results (Le et al., 2015). Hence, the initial forget
# gate bias is set to 3.
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_decoder(options, params, prefix='decoder_lstm'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_decoder(options, params, prefix='decoder_gru'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
params[_p(prefix,'b')] = zero_bias(2*n_h)
Wx = uniform_weight(n_x, n_h)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(n_h)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = zero_bias(n_h)
params[_p(prefix,'b0')] = zero_bias(n_h)
return params
def param_init_decoder(options, params, prefix='decoder'):
n_x = options['n_x']
n_h = options['n_h']
n_z = options['n_z']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
C = np.concatenate([uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h)], axis=1)
params[_p(prefix,'C')] = C
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
C0 = uniform_weight(n_z, n_h)
params[_p(prefix,'C0')] = C0
params[_p(prefix,'b0')] = zero_bias(n_h)
params[_p(prefix,'b_y')] = zero_bias(n_x) # 48
return params
def param_init_decoder(options, params, prefix='decoder'):
n_x = options['n_x']
n_h = options['n_h']
n_z = options['n_z']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
C = np.concatenate([uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h)], axis=1)
params[_p(prefix,'C')] = C
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
C0 = uniform_weight(n_z, n_h)
params[_p(prefix,'C0')] = C0
params[_p(prefix,'b0')] = zero_bias(n_h)
#params[_p(prefix,'b_y')] = zero_bias(n_x) # 48
return params
def param_init_decoder(options, params, prefix='decoder_lstm'):
n_x = options['n_x']
n_h = options['n_h']
n_z = options['n_z']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
#C = np.concatenate([uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h)], axis=1)
#params[_p(prefix,'C')] = C
params[_p(prefix,'b')] = zero_bias(4*n_h)
#params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
C0 = uniform_weight(n_z, n_h)
params[_p(prefix,'C0')] = C0
params[_p(prefix,'b0')] = zero_bias(n_h)
return params
def __init__(self, shape, name):
# input to LSTM, similar to the above, we stack the matricies for compactness, do one
# dot product, and use the slice function below to get the activations for each "gate"
self.W = theano.shared(numpy.concatenate(
[utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1])
], axis=1), name=name+"_W")
# LSTM to LSTM
self.U = theano.shared(numpy.concatenate(
[utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1])
], axis=1), name=name+"_U")
# bias to LSTM
self.b = theano.shared(numpy.zeros((4 * shape[1],)).astype('float32').astype('float32'), name=name+"_b")
# context to LSTM
self.Wc = theano.shared(utils.norm_weight(shape[2], 4 * shape[1]), name=name+"_Wc")
# attention: context -> hidden
self.Wc_att = theano.shared(utils.norm_weight(shape[2], ortho=False), name=name+"_Wc_att")
# attention: LSTM -> hidden
self.Wd_att = theano.shared(utils.norm_weight(shape[1],shape[2]), name=name+"_Wd_att")
# attention: hidden bias
self.b_att = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att")
# optional "deep" attention
self.W_att_1 = theano.shared(utils.ortho_weight(shape[2]), name=name+"_W_att_1")
self.b_att_1 = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att_1")
# attention:
self.U_att = theano.shared(utils.norm_weight(shape[2], 1), name=name+"_U_att")
self.c_att = theano.shared(numpy.zeros((1,)).astype('float32'), name=name+"_c_att")
# attention: selector
self.W_sel = theano.shared(utils.norm_weight(shape[1], 1), name=name+"_W_sel")
self.b_sel = theano.shared(numpy.float32(0.), name=name+"_b_sel")
def param_init_lstm_cond(self, options, params,
prefix='lstm_cond', nin=None, dim=None, dimctx=None):
if nin == None:
nin = options['dim']
if dim == None:
dim = options['dim']
if dimctx == None:
dimctx = options['dim']
# input to LSTM
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
# LSTM to LSTM
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
# bias to LSTM
params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32')
# context to LSTM
# Wc = norm_weight(dimctx, dim * 4)
# params[_p(prefix, 'Wc')] = Wc
# attention: context -> hidden
Wc_att = norm_weight(dimctx, ortho=False)
params[_p(prefix, 'Wc_att')] = Wc_att
# attention: LSTM -> hidden
Wd_att = norm_weight(dim, dimctx)
params[_p(prefix, 'Wd_att')] = Wd_att
# attention: hidden bias
b_att = numpy.zeros((dimctx,)).astype('float32')
params[_p(prefix, 'b_att')] = b_att
# attention:
U_att = norm_weight(dimctx, 1)
params[_p(prefix, 'U_att')] = U_att
c_att = numpy.zeros((1,)).astype('float32')
params[_p(prefix, 'c_tt')] = c_att
if options['selector']:
# attention: selector
W_sel = norm_weight(dim, 1)
params[_p(prefix, 'W_sel')] = W_sel
b_sel = numpy.float32(0.)
params[_p(prefix, 'b_sel')] = b_sel
return params