def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
fs = [self.extractor.process(x)[:2] for x in xs]
ws, cs = concat_examples(fs, padding=IGNORE)
cat_ys, dep_ys = self.forward(ws, cs)
cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
# dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))
cat_ys = [F.log_softmax(
F.reshape(y, (y.shape[1], -1))[1:len(x) + 1]).data for x, y in \
zip(xs, F.split_axis(cat_ys, batchsize, 0))]
dep_ys = [F.log_softmax(y[1:len(x) + 1, :len(x) + 1]).data \
for x, y in zip(xs, dep_ys)]
assert len(cat_ys) == len(dep_ys)
return zip(cat_ys, dep_ys)
python类stack()的实例源码
def forward(self, ws, cs):
batchsize, length, max_word_len = cs.shape
ws = self.emb_word(ws) # (batch, length, word_dim)
cs = F.reshape(
F.max_pooling_2d(
self.conv_char(
F.reshape(
self.emb_char(cs),
(batchsize * length, 1, max_word_len, 50))), (max_word_len, 1)),
(batchsize, length, self.char_dim))
hs = F.transpose(F.concat([ws, cs], 2), (1, 0, 2))
hs = F.dropout(hs, self.dropout_ratio, train=self.train)
hs = F.split_axis(hs, length, 0)
hs_f = []
hs_b = []
self._init_state()
for h_in_f, h_in_b in zip(hs, reversed(hs)):
h_f = self.lstm_f2(self.lstm_f1(F.reshape(h_in_f, (batchsize, -1))))
hs_f.append(h_f)
h_b = self.lstm_b2(self.lstm_b1(F.reshape(h_in_b, (batchsize, -1))))
hs_b.append(h_b)
hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, reversed(hs_b))]
cat_ys = [self.linear_cat2(F.dropout(
F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs]
hs = [F.reshape(h, (length, -1)) for h in \
F.split_axis(F.transpose(F.stack(hs, 2), (0, 2, 1)), batchsize, 0)]
dep_ys = [self.biaffine(
F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)),
F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs]
return cat_ys, dep_ys
def getAllLSTMStates(self):
lstm_state_list_out = [0] * len(self) * 2
for z in six.moves.range(len(self)):
lstm_state_list_out[2 * z] = self[z].c
lstm_state_list_out[2 * z + 1] = self[z].h
# ???????????stack??????? Chainer Variable?????
return chaFunc.stack(lstm_state_list_out)
# ?????????getAllLSTMStates???????????????????
def __call__(self, hx, cx, xs, flag_train, args):
if hx is None:
hx = self.init_hx(xs)
if cx is None:
cx = self.init_hx(xs)
# hx, cx ? (layer?, minibatch???????)?tensor
# xs? (???, minibatch???????)?tensor
# Note: chaFunc.n_step_lstm() ?????????dropout?????
if args.chainer_version_check[0] == 2:
hy, cy, ys = chaFunc.n_step_lstm(
self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs)
else:
hy, cy, ys = chaFunc.n_step_lstm(
self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs,
train=flag_train, use_cudnn=self.use_cudnn)
# hy, cy ? (layer?, minibatch???????) ?????
# ys????????????????????
# ???? (minibatch???????)
# ??????????stack???????????chainer.Variable???
# (???, minibatch???????)?tensor
hlist = chaFunc.stack(ys)
return hy, cy, hlist
# LSTM???????????????????????????????????
def read(self, h):
#M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
M_key = F.stack(self.key_buff, axis=1) # (B, M, m)
self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M)
#p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M)
#print("p", p.shape)
#M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
M_val = F.stack(self.val_buff, axis=1) # (B, M, m)
#print("M_val", M_val.shape)
o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m)
o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m)
#print("o", o.shape)
return o, self.p
def __call__(self, x, train=True):
h_x = self.embed(x)
h_x = [h for h in h_x]
self.nstep_lstm.reset_state()
h_x = self.nstep_lstm(h_x, train)
h_x = [h[-1] for h in h_x]
h_x = F.stack(h_x, 0)
return self.l1(F.dropout(h_x, train=train))
def __call__(self, x, train=True):
h_x = self.embed(x)
h_x = [h for h in h_x]
self.nstep_lstm.reset_state()
h_x = self.nstep_lstm(h_x, train)
h_x = [h[-1] for h in h_x]
h_x = F.stack(h_x, 0)
return self.l1(F.dropout(h_x, train=train))
def __call__(self, x):
# Obtain parameters for q(z|x)
encoding_time = time.time()
self.encode(x)
encoding_time = float(time.time() - encoding_time)
decoding_time_average = 0.
xp = cuda.cupy
self.importance_weights = 0
self.w_holder = []
self.kl = 0
self.logp = 0
for j in xrange(self.num_zsamples):
# Sample z ~ q(z|x)
z = F.gaussian(self.qmu, self.qln_var)
# Compute log q(z|x)
encoder_log = gaussian_logp(z, self.qmu, self.qln_var)
# Obtain parameters for p(x|z)
decoding_time = time.time()
self.decode(z)
decoding_time = time.time() - decoding_time
decoding_time_average += decoding_time
# Compute log p(x|z)
decoder_log = bernoulli_logp(x, self.p_ber_prob_logit)
# Compute log p(z).
prior_log = gaussian_logp0(z)
# Store the latest log weight'
current_temperature = min(self.temperature['value'],1.0)
self.w_holder.append(decoder_log + current_temperature*(prior_log - encoder_log))
# Store the KL and Logp equivalents. They are not used for computation but for recording and reporting.
self.kl += (encoder_log-prior_log)
self.logp += (decoder_log)
self.temperature['value'] += self.temperature['increment']
# Compute w' for this sample (batch)
logps = F.stack(self.w_holder)
self.obj_batch = F.logsumexp(logps, axis=0) - np.log(self.num_zsamples)
self.kl /= self.num_zsamples
self.logp /= self.num_zsamples
decoding_time_average /= self.num_zsamples
batch_size = self.obj_batch.shape[0]
self.obj = -F.sum(self.obj_batch)/batch_size
self.timing_info = np.array([encoding_time,decoding_time_average])
return self.obj
def __call__(self, x):
# Obtain parameters for q(z|x)
encoding_time = time.time()
self.encode(x)
encoding_time = float(time.time() - encoding_time)
decoding_time_average = 0.
xp = cuda.cupy
self.importance_weights = 0
self.w_holder = []
self.kl = 0
self.logp = 0
for j in xrange(self.num_zsamples):
# Sample z ~ q(z|x)
z = F.gaussian(self.qmu, self.qln_var)
# Compute log q(z|x)
encoder_log = gaussian_logp(z, self.qmu, self.qln_var)
# Obtain parameters for p(x|z)
decoding_time = time.time()
self.decode(z)
decoding_time = time.time() - decoding_time
decoding_time_average += decoding_time
# Compute log p(x|z)
decoder_log = gaussian_logp(x, self.pmu, self.pln_var)
# Compute log p(z). The odd notation being used is to supply a mean of 0 and covariance of 1
prior_log = gaussian_logp(z, self.qmu*0, self.qln_var/self.qln_var)
# Store the latest log weight'
current_temperature = min(self.temperature['value'],1.0)
self.w_holder.append(decoder_log + current_temperature*(prior_log - encoder_log))
# Store the KL and Logp equivalents. They are not used for computation but for recording and reporting.
self.kl += (encoder_log-prior_log)
self.logp += (decoder_log)
self.temperature['value'] += self.temperature['increment']
# Compute w' for this sample (batch)
logps = F.stack(self.w_holder)
self.obj_batch = F.logsumexp(logps, axis=0) - np.log(self.num_zsamples)
self.kl /= self.num_zsamples
self.logp /= self.num_zsamples
decoding_time_average /= self.num_zsamples
batch_size = self.obj_batch.shape[0]
self.obj = -F.sum(self.obj_batch)/batch_size
self.timing_info = np.array([encoding_time,decoding_time_average])
return self.obj
def __call__(self, x):
# Obtain parameters for q(z|x)
encoding_time = time.time()
self.encode(x)
encoding_time = float(time.time() - encoding_time)
decoding_time_average = 0.
xp = cuda.cupy
self.importance_weights = 0
self.w_holder = []
self.kl = 0
self.logp = 0
for j in xrange(self.num_zsamples):
# Sample z ~ q(z|x)
z = F.gaussian(self.qmu, self.qln_var)
# Compute log q(z|x)
encoder_log = gaussian_logp(z, self.qmu, self.qln_var)
# Obtain parameters for p(x|z)
decoding_time = time.time()
self.decode(z)
decoding_time = time.time() - decoding_time
decoding_time_average += decoding_time
# Compute log p(x|z)
decoder_log = bernoulli_logp(x, self.p_ber_prob_logit)
# Compute log p(z).
prior_log = gaussian_logp0(z)
# Store the latest log weight'
current_temperature = min(self.temperature['value'],1.0)
self.w_holder.append(decoder_log + current_temperature*(prior_log - encoder_log))
# Store the KL and Logp equivalents. They are not used for computation but for recording and reporting.
self.kl += (encoder_log-prior_log)
self.logp += (decoder_log)
self.temperature['value'] += self.temperature['increment']
# Compute w' for this sample (batch)
logps = F.stack(self.w_holder)
self.obj_batch = F.logsumexp(logps, axis=0) - np.log(self.num_zsamples)
self.kl /= self.num_zsamples
self.logp /= self.num_zsamples
decoding_time_average /= self.num_zsamples
batch_size = self.obj_batch.shape[0]
self.obj = -F.sum(self.obj_batch)/batch_size
self.timing_info = np.array([encoding_time,decoding_time_average])
return self.obj
def beam_search(dec,state,y,data,beam_width,mydict_inv):
beam_width=beam_width
xp=cuda.cupy
batchsize=data.shape[0]
vocab_size=len(mydict_inv)
topk=20
route = np.zeros((batchsize,beam_width,50)).astype(np.int32)
for j in range(50):
if j == 0:
y = Variable(xp.array(np.argmax(y.data.get(), axis=1)).astype(xp.int32))
state,y = dec(y, state, train=False)
h=state['h1'].data
c=state['c1'].data
h=xp.tile(h.reshape(batchsize,1,-1), (1,beam_width,1))
c=xp.tile(c.reshape(batchsize,1,-1), (1,beam_width,1))
ptr=F.log_softmax(y).data.get()
pred_total_city = np.argsort(ptr)[:,::-1][:,:beam_width]
pred_total_score = np.sort(ptr)[:,::-1][:,:beam_width]
route[:,:,j] = pred_total_city
pred_total_city=pred_total_city.reshape(batchsize,beam_width,1)
else:
pred_next_score=np.zeros((batchsize,beam_width,topk))
pred_next_city=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
score2idx=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
for b in range(beam_width):
state={'c1':Variable(c[:,b,:]), 'h1':Variable(h[:,b,:])}
cur_city = xp.array([pred_total_city[i,b,j-1] for i in range(batchsize)]).astype(xp.int32)
state,y = dec(cur_city,state, train=False)
h[:,b,:]=state['h1'].data
c[:,b,:]=state['c1'].data
ptr=F.log_softmax(y).data.get()
pred_next_score[:,b,:]=np.sort(ptr, axis=1)[:,::-1][:,:topk]
pred_next_city[:,b,:]=np.argsort(ptr, axis=1)[:,::-1][:,:topk]
h=F.stack([h for i in range(topk)], axis=2).data
c=F.stack([c for i in range(topk)], axis=2).data
pred_total_city = np.tile(route[:,:,:j],(1,1,topk)).reshape(batchsize,beam_width,topk,j)
pred_next_city = pred_next_city.reshape(batchsize,beam_width,topk,1)
pred_total_city = np.concatenate((pred_total_city,pred_next_city),axis=3)
pred_total_score = np.tile(pred_total_score.reshape(batchsize,beam_width,1),(1,1,topk)).reshape(batchsize,beam_width,topk,1)
pred_next_score = pred_next_score.reshape(batchsize,beam_width,topk,1)
pred_total_score += pred_next_score
idx = pred_total_score.reshape(batchsize,beam_width * topk).argsort(axis=1)[:,::-1][:,:beam_width]
pred_total_city = pred_total_city[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,j+1)
pred_total_score = pred_total_score[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,1)
h = h[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)
c = c[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)
route[:,:,:j+1] =pred_total_city
if (pred_total_city[:,:,j] == 15).all():
break
return route[:,0,:j+1].tolist()
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate):
if args.gpu_enc != args.gpu_dec: # enc?dec??GPU???
chainer.cuda.get_device(args.gpu_enc).use()
encLen = len(sentence) # ??
cMBSize = len(sentence[0]) # minibatch size
# ?????embedding??? ??????????
encEmbList = self.getEncoderInputEmbeddings(sentence, args)
flag_train = (train_mode > 0)
lstmVars = [0] * self.n_layers * 2
if self.flag_merge_encfwbw == 0: # fw?bw??????????????
hyf, cyf, fwHout = self.model.encLSTM_f(
None, None, encEmbList, flag_train, args) # ???
hyb, cyb, bkHout = self.model.encLSTM_b(
None, None, encEmbList[::-1], flag_train, args) # ???
for z in six.moves.range(self.n_layers):
lstmVars[2 * z] = cyf[z] + cyb[z]
lstmVars[2 * z + 1] = hyf[z] + hyb[z]
elif self.flag_merge_encfwbw == 1: # fw?bw????????
sp = (cMBSize, self.hDim)
for z in six.moves.range(self.n_layers):
if z == 0: # ??? embedding???
biH = encEmbList
else: # ????? ????????
# ????????bkHout????????????
biH = fwHout + bkHout[::-1]
# z?????
hyf, cyf, fwHout = self.model.encLSTM_f(
z, biH, flag_train, dropout_rate, args)
# z??????
hyb, cyb, bkHout = self.model.encLSTM_b(
z, biH[::-1], flag_train, dropout_rate, args)
# ??????????????????????????
# ???????
lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp)
lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp)
else:
assert 0, "ERROR"
# ?????
if self.flag_enc_boseos == 0: # default
# fwHout?[:,]???????????
biHiddenStack = fwHout[:, ] + bkHout[::-1]
elif self.flag_enc_boseos == 1:
bkHout2 = bkHout[::-1] # ?????
biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ]
# BOS, EOS?????? TODO ??????0??????????
encLen -= 2
else:
assert 0, "ERROR"
# (enc????, minibatch??, ??????)
# => (minibatch??, enc????, ??????)???
biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1)
# ?LSTM???????????decoder?LSTM????????
lstmVars = chaFunc.stack(lstmVars)
# encoder????encInfoObject???????
retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize)
return retO