def __init__(self, input_size, output_size):
super(Encoder, self).__init__(
x_f = links.LSTM(input_size, output_size),
x_b = links.LSTM(input_size, output_size),
f_y = links.Linear(output_size, output_size),
b_y = links.Linear(output_size, output_size),
)
python类LSTM的实例源码
def __init__(self, n_words, n_cwords, n_memory, n_output):
self.n_words = n_words
self.n_cwords = n_cwords
self.n_memory = n_memory
self.n_output = n_output
super().__init__(
input=cl.EmbedID(self.n_words, self.n_cwords),
memory=cl.LSTM(self.n_cwords, self.n_memory),
output=cl.Linear(self.n_memory, self.n_output)
)
def __init__(self, n_input_units=1000,n_vocab=100, n_units=100, train=True):
super(RNNLM, self).__init__(
inputVector= L.Linear(n_input_units, n_units),
embed=L.EmbedID(n_vocab, n_units),
l1=L.LSTM(n_units, n_units),
l2=L.LSTM(n_units, n_units),
l3=L.Linear(n_units, n_vocab),
)
self.train = train
def __init__(self, vocab, args):
def get_initialW_X(shape):
return np.random.normal(0, (2.0/(sum(shape)))**0.5, shape).astype(np.float32)
super(DERN, self).__init__(
# Word Embedding
embed=L.EmbedID(len(vocab), args.n_units),
# bi-LSTMs
f_LSTM=L.LSTM(args.n_units, args.n_units), # for article
b_LSTM=L.LSTM(args.n_units, args.n_units),
Q_f_LSTM=L.LSTM(args.n_units, args.n_units), # for query
Q_b_LSTM=L.LSTM(args.n_units, args.n_units),
# Matrices and vectors
W_hd=L.Linear(4*args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))),
W_dm=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))),
m=L.Linear(args.n_units, 1, initialW=get_initialW_X((1, args.n_units))),
W_hq=L.Linear(4 * args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))),
W_hu=L.Linear(4 * args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))),
W_dv=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))),
W_dx=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))),
W_dxQ=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))),
b_v2=L.Linear(1, args.n_units, initialW=get_initialW_X((args.n_units, 1)))
)
self.args = args
self.n_vocab = len(vocab)
self.n_units = args.n_units
self.dropout_ratio = args.d_ratio
self.PH_id = vocab["@placeholder"]
self.eos_id = vocab["<eos>"]
self.bos_id = vocab["<bos>"]
self.boq_id = vocab["<boq>"]
self.BOQ_tok_batch = self.xp.array([self.boq_id], dtype=np.int32)
self.NULL_id = vocab["NULL_tok"]
self.NULL_tok = self.xp.array(self.NULL_id, dtype=np.int32)
self.initialize_additionally()
def encode_tokens(self, x_datas, i2sD, train=True):
# Embed, dropout, split into each token (batchsize=1)
h0L = list(F.split_axis(
F.dropout(
self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)),
ratio=self.dropout_ratio, train=train), len(x_datas), axis=0))
# Replace embedding with dynamic entity representation
for i in i2sD.keys():
h0L[i] = self.W_dx(i2sD[i])
# LSTM. forward order
forward_outL = []
self.f_LSTM.reset_state()
for h0 in h0L:
state = self.f_LSTM(h0)
forward_outL.append(state)
# LSTM. backward order
backward_outL = []
self.b_LSTM.reset_state()
for h0 in reversed(h0L):
state = self.b_LSTM(h0)
backward_outL.append(state)
return forward_outL, backward_outL
def __init__(
self, g_size=8, n_steps=6, n_scales=1, var=0.03, use_lstm=False
):
d_glm = 128
d_core = 256
super(RAM, self).__init__(
emb_l=L.Linear(2, d_glm),
emb_x=L.Linear(g_size*g_size*n_scales, d_glm),
fc_lg=L.Linear(d_glm, d_core),
fc_xg=L.Linear(d_glm, d_core),
fc_ha=L.Linear(d_core, 10),
fc_hl=L.Linear(d_core, 2),
fc_hb=L.Linear(d_core, 1),
)
if use_lstm:
self.add_link(name='core_lstm', link=L.LSTM(d_core, d_core))
else:
self.add_link(name='core_hh', link=L.Linear(d_core, d_core))
self.add_link(name='core_gh', link=L.Linear(d_core, d_core))
self.use_lstm = use_lstm
self.d_core = d_core
self.g_size = g_size
self.n_steps = n_steps
self.n_scales = n_scales
self.var = var
def __call__(self, opt):
self.norm_orig = np.sqrt(chainer.optimizer._sum_sqnorm(
[p.grad for p in opt.target.params()]))
self.norm = self.norm_orig
self.rate = self.threshold / self.norm_orig
if self.rate < 1:
for param in opt.target.params():
grad = param.grad
with cuda.get_device(grad):
grad *= self.rate
self.norm = self.threshold
# LSTM???????????????????????????????????
# ??????decoder??LSTM??????????
def reset_state(self):
for layer in self:
layer.reset_state()
# ?? encoder ? decoder ??????????beam search???
# ???LSTM?????????????????????
def __call__(self, hx, cx, xs, flag_train, args):
if hx is None:
hx = self.init_hx(xs)
if cx is None:
cx = self.init_hx(xs)
# hx, cx ? (layer?, minibatch???????)?tensor
# xs? (???, minibatch???????)?tensor
# Note: chaFunc.n_step_lstm() ?????????dropout?????
if args.chainer_version_check[0] == 2:
hy, cy, ys = chaFunc.n_step_lstm(
self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs)
else:
hy, cy, ys = chaFunc.n_step_lstm(
self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs,
train=flag_train, use_cudnn=self.use_cudnn)
# hy, cy ? (layer?, minibatch???????) ?????
# ys????????????????????
# ???? (minibatch???????)
# ??????????stack???????????chainer.Variable???
# (???, minibatch???????)?tensor
hlist = chaFunc.stack(ys)
return hy, cy, hlist
# LSTM???????????????????????????????????
def __init__(self, input_num, action_num, max_buff_size, m, e):
print("RMQN Model", input_num, action_num)
super(RMQN, self).__init__(
memory_module = MemoryModule(max_buff_size=max_buff_size, m=m, e=e),
encoder=L.Linear(in_size=input_num, out_size=e),
context=L.LSTM(in_size=e, out_size=m),
quality=QualityPhi(m, action_num),
)
def __init__(self, input_num, action_num, max_buff_size, m, e):
assert(m == e)
print("FRMQN Model", input_num, action_num)
super(FRMQN, self).__init__(
memory_module = MemoryModule(max_buff_size=max_buff_size, m=m, e=e),
encoder=L.Linear(in_size=input_num, out_size=e),
context=L.LSTM(in_size=(e+m), out_size=m),
quality=QualityPhi(m, action_num),
)
self.o = None
def __init__(self, input_num, action_num):
print("DRQN Model", input_num, action_num)
super(DRQN, self).__init__(
fc1=L.Linear(input_num, 256),
lstm=L.LSTM(256, 256),
fc2=L.Linear(256, action_num),
)
def __init__(self, n_vocab, n_units, train=True):
super(charRNN, self).__init__(
embed=L.EmbedID(n_vocab, n_units),
l1=L.LSTM(n_units, n_units),
l2=L.LSTM(n_units, n_units),
l3=L.Linear(n_units, n_vocab),
)
for param in self.params():
param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape)
self.train = train
def __init__(self, n_layer, n_unit, n_vocab):
super(AttentionNet, self).__init__(
l1 = L.Linear(n_unit, n_unit),
l2 = L.Linear(n_unit, n_unit),
fnn = L.Linear(n_unit, 1),
lstm = L.LSTM(n_unit, n_unit),
dec = L.Linear(n_unit, n_vocab),
)
def __init__(self, word_num, feature_num, hidden_num):
super(ImageCaption, self).__init__(
word_vec = L.EmbedID(word_num, hidden_num),
image_vec = L.Linear(feature_num, hidden_num),
lstm = L.LSTM(hidden_num, hidden_num),
out_word = L.Linear(hidden_num, word_num),
)
def __init__(self, vocaburary_size, img_feature_dim=2048, hidden_dim=512,dropout_ratio=0.5,train=True):
self.dropout_ratio = dropout_ratio
super(Image2CaptionDecoderOld, self).__init__(
embed_word= L.EmbedID(vocaburary_size, hidden_dim),
embed_image= L.Linear(img_feature_dim, hidden_dim),
lstm = L.LSTM(hidden_dim, hidden_dim),
decode_word = L.Linear(hidden_dim, vocaburary_size),
)
self.train = train
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate):
if args.gpu_enc != args.gpu_dec: # enc?dec??GPU???
chainer.cuda.get_device(args.gpu_enc).use()
encLen = len(sentence) # ??
cMBSize = len(sentence[0]) # minibatch size
# ?????embedding??? ??????????
encEmbList = self.getEncoderInputEmbeddings(sentence, args)
flag_train = (train_mode > 0)
lstmVars = [0] * self.n_layers * 2
if self.flag_merge_encfwbw == 0: # fw?bw??????????????
hyf, cyf, fwHout = self.model.encLSTM_f(
None, None, encEmbList, flag_train, args) # ???
hyb, cyb, bkHout = self.model.encLSTM_b(
None, None, encEmbList[::-1], flag_train, args) # ???
for z in six.moves.range(self.n_layers):
lstmVars[2 * z] = cyf[z] + cyb[z]
lstmVars[2 * z + 1] = hyf[z] + hyb[z]
elif self.flag_merge_encfwbw == 1: # fw?bw????????
sp = (cMBSize, self.hDim)
for z in six.moves.range(self.n_layers):
if z == 0: # ??? embedding???
biH = encEmbList
else: # ????? ????????
# ????????bkHout????????????
biH = fwHout + bkHout[::-1]
# z?????
hyf, cyf, fwHout = self.model.encLSTM_f(
z, biH, flag_train, dropout_rate, args)
# z??????
hyb, cyb, bkHout = self.model.encLSTM_b(
z, biH[::-1], flag_train, dropout_rate, args)
# ??????????????????????????
# ???????
lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp)
lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp)
else:
assert 0, "ERROR"
# ?????
if self.flag_enc_boseos == 0: # default
# fwHout?[:,]???????????
biHiddenStack = fwHout[:, ] + bkHout[::-1]
elif self.flag_enc_boseos == 1:
bkHout2 = bkHout[::-1] # ?????
biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ]
# BOS, EOS?????? TODO ??????0??????????
encLen -= 2
else:
assert 0, "ERROR"
# (enc????, minibatch??, ??????)
# => (minibatch??, enc????, ??????)???
biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1)
# ?LSTM???????????decoder?LSTM????????
lstmVars = chaFunc.stack(lstmVars)
# encoder????encInfoObject???????
retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize)
return retO