def init_params(options):
params = OrderedDict()
# embedding
params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
params = get_layer(options['encoder'])[0](options, params,
prefix='encoder',
nin=options['dim_word'],
dim=options['dim'])
# readout
params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm',
nin=options['dim'], nout=options['dim_word'],
ortho=False)
params = get_layer('ff')[0](options, params, prefix='ff_logit_prev',
nin=options['dim_word'],
nout=options['dim_word'], ortho=False)
params = get_layer('ff')[0](options, params, prefix='ff_logit',
nin=options['dim_word'],
nout=options['n_words'])
return params
# build a training model
python类norm_weight()的实例源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def init_params(options):
"""
Initialize all parameters
"""
params = OrderedDict()
# Word embedding
params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
# Encoder
params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
nin=options['dim_word'], dim=options['dim'])
# Decoder: next sentence
params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f',
nin=options['dim_word'], dim=options['dim'])
# Decoder: previous sentence
params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b',
nin=options['dim_word'], dim=options['dim'])
# Output layer
params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words'])
return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'):
assert prefix is not None
# Stack the weight matricies for faster dot prods
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32')
return params
# This function implements the lstm fprop
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
"""
Affine transformation + point-wise nonlinearity
"""
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho)
params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
"""
Affine transformation + point-wise nonlinearity
"""
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho)
params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
return params
def param_init_fflayer(self, options, params, prefix='ff', nin=None, nout=None):
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32')
return params
def init_params(self, options):
# all parameters
params = OrderedDict()
# embedding
params['Wemb'] = utils.norm_weight(options['n_words'], options['dim_word'])
ctx_dim = options['ctx_dim']
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_state', nin=ctx_dim, nout=options['dim'])
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_memory', nin=ctx_dim, nout=options['dim'])
# decoder: LSTM
params = self.layers.get_layer('lstm_cond')[0](options, params, prefix='bo_lstm',
nin=options['dim_word'], dim=options['dim'],
dimctx=ctx_dim)
params = self.layers.get_layer('lstm')[0](params, nin=options['dim'], dim=options['dim'],
prefix='to_lstm')
# readout
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_logit_bo',
nin=options['dim'], nout=options['dim_word'])
if options['ctx2out']:
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_logit_ctx',
nin=ctx_dim, nout=options['dim_word'])
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_logit_to',
nin=options['dim'], nout=options['dim_word'])
params = self.layers.get_layer('ff')[0](
options, params, prefix='ff_logit',
nin=options['dim_word'], nout=options['n_words'])
return params
def __init__(self, shape, name):
# input to LSTM, similar to the above, we stack the matricies for compactness, do one
# dot product, and use the slice function below to get the activations for each "gate"
self.W = theano.shared(numpy.concatenate(
[utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1]),
utils.norm_weight(shape[0],shape[1])
], axis=1), name=name+"_W")
# LSTM to LSTM
self.U = theano.shared(numpy.concatenate(
[utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1]),
utils.ortho_weight(shape[1])
], axis=1), name=name+"_U")
# bias to LSTM
self.b = theano.shared(numpy.zeros((4 * shape[1],)).astype('float32').astype('float32'), name=name+"_b")
# context to LSTM
self.Wc = theano.shared(utils.norm_weight(shape[2], 4 * shape[1]), name=name+"_Wc")
# attention: context -> hidden
self.Wc_att = theano.shared(utils.norm_weight(shape[2], ortho=False), name=name+"_Wc_att")
# attention: LSTM -> hidden
self.Wd_att = theano.shared(utils.norm_weight(shape[1],shape[2]), name=name+"_Wd_att")
# attention: hidden bias
self.b_att = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att")
# optional "deep" attention
self.W_att_1 = theano.shared(utils.ortho_weight(shape[2]), name=name+"_W_att_1")
self.b_att_1 = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att_1")
# attention:
self.U_att = theano.shared(utils.norm_weight(shape[2], 1), name=name+"_U_att")
self.c_att = theano.shared(numpy.zeros((1,)).astype('float32'), name=name+"_c_att")
# attention: selector
self.W_sel = theano.shared(utils.norm_weight(shape[1], 1), name=name+"_W_sel")
self.b_sel = theano.shared(numpy.float32(0.), name=name+"_b_sel")
def param_init_lstm_cond(self, options, params,
prefix='lstm_cond', nin=None, dim=None, dimctx=None):
if nin == None:
nin = options['dim']
if dim == None:
dim = options['dim']
if dimctx == None:
dimctx = options['dim']
# input to LSTM
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
# LSTM to LSTM
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
# bias to LSTM
params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32')
# context to LSTM
# Wc = norm_weight(dimctx, dim * 4)
# params[_p(prefix, 'Wc')] = Wc
# attention: context -> hidden
Wc_att = norm_weight(dimctx, ortho=False)
params[_p(prefix, 'Wc_att')] = Wc_att
# attention: LSTM -> hidden
Wd_att = norm_weight(dim, dimctx)
params[_p(prefix, 'Wd_att')] = Wd_att
# attention: hidden bias
b_att = numpy.zeros((dimctx,)).astype('float32')
params[_p(prefix, 'b_att')] = b_att
# attention:
U_att = norm_weight(dimctx, 1)
params[_p(prefix, 'U_att')] = U_att
c_att = numpy.zeros((1,)).astype('float32')
params[_p(prefix, 'c_tt')] = c_att
if options['selector']:
# attention: selector
W_sel = norm_weight(dim, 1)
params[_p(prefix, 'W_sel')] = W_sel
b_sel = numpy.float32(0.)
params[_p(prefix, 'b_sel')] = b_sel
return params