def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
python类_p()的实例源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def param_init_encoder(options, params, prefix='lstm_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
# It is observed that setting a high initial forget gate bias for LSTMs can
# give slighly better results (Le et al., 2015). Hence, the initial forget
# gate bias is set to 3.
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_encoder(options, params, prefix='gru_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
params[_p(prefix,'b')] = zero_bias(2*n_h)
Wx = uniform_weight(n_x, n_h)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(n_h)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = zero_bias(n_h)
return params
def init_params(options,W):
params = OrderedDict()
# W is initialized by the pretrained word embedding
params['Wemb'] = W.astype(config.floatX)
# otherwise, W will be initialized randomly
# n_words = options['n_words']
# n_x = options['n_x']
# params['Wemb'] = uniform_weight(n_words,n_x)
length = len(options['filter_shapes'])
for idx in range(length):
params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx))
n_h = options['feature_maps'] * length
params['Wy'] = uniform_weight(n_h,options['n_y'])
params['by'] = zero_bias(options['n_y'])
return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'):
assert prefix is not None
# Stack the weight matricies for faster dot prods
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32')
return params
# This function implements the lstm fprop
def param_init_mlp_layer(input_shape, pred_shape, params, prefix='mlp_layer'):
""" input_shape: (num of hiddens, number of input features)
pred_shape: (num of labels, number of hiddens)
"""
W1 = np.asarray(rng.uniform(low=-0.01,high=0.01,size=input_shape),dtype=theano.config.floatX)
b1 = np.ones((input_shape[0],), dtype=theano.config.floatX)*0.01 # initialize as 1 rather than 0
V1 = np.asarray(rng.uniform(low=-0.01,high=0.01,size=pred_shape),dtype=theano.config.floatX) # 2*200
c1 = np.ones((pred_shape[0],), dtype=theano.config.floatX)*0.01 # initialize as 1
params[_p(prefix,'W1')] = W1
params[_p(prefix,'b1')] = b1
params[_p(prefix,'V1')] = V1
params[_p(prefix,'c1')] = c1
return params
def mlp_layer_softmax(tparams, layer1_input, prefix='mlp_layer'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200
y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')]
#y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing
#y_recons = tensor.nnet.softmax(y_recons) # 64*2
max_w = tensor.max(y_recons, axis = 1, keepdims=True)
e0 = tensor.exp(y_recons - max_w)
y_recons = e0 / tensor.sum(e0, axis = 1, keepdims=True)
return y_recons
def param_init_encoder(options, params, prefix='encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def encoder(tparams, layer0_input, filter_shape, pool_size, options, prefix='cnn_d'):
""" filter_shape: (number of filters, num input feature maps, filter height,
filter width)
image_shape: (batch_size, num input feature maps, image height, image width)
"""
conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')], filter_shape=filter_shape)
# conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x'))
# output = downsample.max_pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=False)
if options['cnn_activation'] == 'tanh':
conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x'))
output = downsample.max_pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=False) # the ignore border is very important
elif options['cnn_activation'] == 'linear':
conv_out2 = conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x')
output = downsample.max_pool_2d(input=conv_out2, ds=pool_size, ignore_border=False) # the ignore border is very important
else:
print(' Wrong specification of activation function in CNN')
return output.flatten(2)
#output.flatten(2)
def param_init_encoder(options, params, prefix='lstm_encoder'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
# It is observed that setting a high initial forget gate bias for LSTMs can
# give slighly better results (Le et al., 2015). Hence, the initial forget
# gate bias is set to 3.
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_decoder(options, params, prefix='decoder_lstm'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
return params
def param_init_decoder(options, params, prefix='decoder_gru'):
n_x = options['n_x']
n_h = options['n_h']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
params[_p(prefix,'b')] = zero_bias(2*n_h)
Wx = uniform_weight(n_x, n_h)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(n_h)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = zero_bias(n_h)
params[_p(prefix,'b0')] = zero_bias(n_h)
return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
"""
Affine transformation + point-wise nonlinearity
"""
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho)
params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
return params
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
"""
Feedforward pass
"""
return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])
# GRU layer
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
"""
Affine transformation + point-wise nonlinearity
"""
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho)
params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
return params
def param_init_encoder(filter_shape, params, prefix='cnn_encoder'):
""" filter_shape: (number of filters, num input feature maps, filter height,
filter width)
image_shape: (batch_size, num input feature maps, image height, image width)
"""
W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX)
b = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = b
return params
def encoder(tparams, layer0_input, filter_shape, pool_size,
prefix='cnn_encoder'):
""" filter_shape: (number of filters, num input feature maps, filter height,
filter width)
image_shape: (batch_size, num input feature maps, image height, image width)
"""
conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')],
filter_shape=filter_shape)
conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x'))
output = pool.pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=True)
return output.flatten(2)
def param_init_fflayer(self, options, params, prefix='ff', nin=None, nout=None):
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32')
return params
def fflayer(self, tparams, state_below, options,
prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
return eval(activ)(tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[
_p(prefix, 'b')])
# LSTM layer
def mlp_layer_tanh(tparams, layer1_input, prefix='mlp_layer'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200
y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')]
#y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing
#y_recons = tensor.nnet.softmax(y_recons) # 64*2
y_recons = tensor.tanh(y_recons)
return y_recons
def mlp_layer_linear(tparams, layer1_input, prefix='mlp_layer'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200
y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')]
#y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing
#y_recons = tensor.nnet.softmax(y_recons) # 64*2
return y_recons
def middle_layer(tparams, layer1_input, prefix='mlp_layer'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200
# y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')] # avoid numerical issues
# y_recons = tensor.nnet.softmax(y_recons) # 64*2
return hidden_2_out
def param_init_decoder(options, params, prefix='decoder'):
n_x = options['n_x']
n_h = options['n_h']
n_z = options['n_z']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix, 'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix, 'U')] = U
C = np.concatenate([uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h),
uniform_weight(n_z,n_h)], axis=1)
params[_p(prefix,'C')] = C
params[_p(prefix,'b')] = zero_bias(4*n_h)
params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
C0 = uniform_weight(n_z, n_h)
params[_p(prefix,'C0')] = C0
params[_p(prefix,'b0')] = zero_bias(n_h)
params[_p(prefix,'b_y')] = zero_bias(n_x) # 48
return params
def param_init_encoder(filter_shape, params, prefix='cnn_d'):
""" filter_shape: (number of filters, num input feature maps, filter height,
filter width)
image_shape: (batch_size, num input feature maps, image height, image width)
"""
W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX)
b = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = b
return params
def param_init_batch_norm(input_shape,params, prefix='cnn'):
""" input_shape: (num of hiddens, number of input features)
pred_shape: (num of labels, number of hiddens)
"""
beta = np.ones((input_shape[1],),dtype=theano.config.floatX) *0.01
gamma = np.ones((input_shape[1],),dtype=theano.config.floatX) *0.1
params[_p(prefix,'beta')] = beta
params[_p(prefix,'gamma')] = gamma
return params
def batch_norm(tparams, input, options, prefix='cnn'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
input_hat=(input-input.mean(0))/(input.std(0)+1.0/options['L'])
input_=input_hat*tparams[_p(prefix,'gamma')]+tparams[_p(prefix,'beta')]
return input_
def param_init_decoder(options, params, prefix='decoder_lstm'):
n_x = options['n_x']
n_h = options['n_h']
n_z = options['n_z']
W = np.concatenate([uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h),
uniform_weight(n_x,n_h)], axis=1)
params[_p(prefix,'W')] = W
U = np.concatenate([ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h),
ortho_weight(n_h)], axis=1)
params[_p(prefix,'U')] = U
#C = np.concatenate([uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h),
# uniform_weight(n_z,n_h)], axis=1)
#params[_p(prefix,'C')] = C
params[_p(prefix,'b')] = zero_bias(4*n_h)
#params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)
C0 = uniform_weight(n_z, n_h)
params[_p(prefix,'C0')] = C0
params[_p(prefix,'b0')] = zero_bias(n_h)
return params
def decoder_layer(tparams, state_below, prefix='decoder_gru'):
""" state_below: size of n_steps * n_x
"""
n_steps = state_below.shape[0]
n_h = tparams[_p(prefix,'Ux')].shape[1]
state_belowx0 = tparams[_p(prefix, 'b0')]
h0vec = tensor.tanh(state_belowx0)
h0 = h0vec.dimshuffle('x',0)
def _slice(_x, n, dim):
return _x[n*dim:(n+1)*dim]
state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
def _step_slice(x_, xx_, h_, U, Ux):
preact = tensor.dot(h_, U)
preact += x_
r = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
u = tensor.nnet.sigmoid(_slice(preact, 1, n_h))
preactx = tensor.dot(h_, Ux)
preactx = preactx * r
preactx = preactx + xx_
h = tensor.tanh(preactx)
h = u * h_ + (1. - u) * h
return h
seqs = [state_below_[:n_steps-1], state_belowx[:n_steps-1]]
_step = _step_slice
rval, updates = theano.scan(_step,
sequences=seqs,
outputs_info = [h0vec],
non_sequences = [tparams[_p(prefix, 'U')],
tparams[_p(prefix, 'Ux')]],
name=_p(prefix, '_layers'),
n_steps=n_steps-1)
#h0x = h0.dimshuffle('x',0,1)
return tensor.concatenate((h0,rval))
def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, **kwargs):
"""
Feedforward pass through GRU
"""
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
dim = tparams[_p(prefix,'Ux')].shape[1]
if init_state == None:
init_state = tensor.alloc(0., n_samples, dim)
if mask == None:
mask = tensor.alloc(1., state_below.shape[0], 1)
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n*dim:(n+1)*dim]
return _x[:, n*dim:(n+1)*dim]
state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
U = tparams[_p(prefix, 'U')]
Ux = tparams[_p(prefix, 'Ux')]
def _step_slice(m_, x_, xx_, h_, U, Ux):
preact = tensor.dot(h_, U)
preact += x_
r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
u = tensor.nnet.sigmoid(_slice(preact, 1, dim))
preactx = tensor.dot(h_, Ux)
preactx = preactx * r
preactx = preactx + xx_
h = tensor.tanh(preactx)
h = u * h_ + (1. - u) * h
h = m_[:,None] * h + (1. - m_)[:,None] * h_
return h
seqs = [mask, state_below_, state_belowx]
_step = _step_slice
rval, updates = theano.scan(_step,
sequences=seqs,
outputs_info = [init_state],
non_sequences = [tparams[_p(prefix, 'U')],
tparams[_p(prefix, 'Ux')]],
name=_p(prefix, '_layers'),
n_steps=nsteps,
profile=False,
strict=True)
rval = [rval]
return rval