def sample(self, input, temperature=1., hidden=None):
hidden = self.module_.init_hidden(1) if hidden is None else hidden
output, hidden = self.module_(input, hidden)
probas = output.squeeze().data.div(temperature).exp()
sample = torch.multinomial(probas, 1)[-1]
if probas.dim() > 1:
sample = sample[0]
return sample, self.repackage_hidden(hidden)
python类multinomial()的实例源码
def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
hidden = decoder.init_hidden(1)
prime_input = torch.autograd.Variable(char_tensor(prime_str).unsqueeze(0))
if cuda:
hidden = hidden.cuda()
prime_input = prime_input.cuda()
predicted = prime_str
# Use priming string to "build up" hidden state
for p in range(len(prime_str) - 1):
_, hidden = decoder(prime_input[:,p], hidden)
inp = prime_input[:,-1]
for p in range(predict_len):
output, hidden = decoder(inp, hidden)
# Sample from the network as a multinomial distribution
output_dist = output.data.view(-1).div(temperature).exp()
top_i = torch.multinomial(output_dist, 1)[0]
# Add predicted character to string and use as next input
predicted_char = chars[top_i]
predicted += predicted_char
inp = torch.autograd.Variable(char_tensor(predicted_char).unsqueeze(0))
if cuda:
inp = inp.cuda()
return predicted
# Run as standalone script
def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
hidden = decoder.init_hidden(1)
prime_input = torch.autograd.Variable(char_tensor(prime_str).unsqueeze(0))
if cuda:
hidden = hidden.cuda()
prime_input = prime_input.cuda()
predicted = prime_str
# Use priming string to "build up" hidden state
for p in range(len(prime_str) - 1):
_, hidden = decoder(prime_input[:,p], hidden)
inp = prime_input[:,-1]
for p in range(predict_len):
output, hidden = decoder(inp, hidden)
# Sample from the network as a multinomial distribution
output_dist = output.data.view(-1).div(temperature).exp()
top_i = torch.multinomial(output_dist, 1)[0]
# Add predicted character to string and use as next input
predicted_char = chars[top_i]
predicted += predicted_char
inp = torch.autograd.Variable(char_tensor(predicted_char).unsqueeze(0))
if cuda:
inp = inp.cuda()
return predicted
# Run as standalone script
def vis_generate(decoder, input_str, temperature=0.8):
hidden = decoder.init_hidden(1)
test_len = len(input_str)
prime_input = torch.autograd.Variable(char_tensor(input_str[0]).unsqueeze(0))
for p in range(len(prime_input) - 1):
_, hidden = decoder(prime_input[:,p], hidden)
hidden_matrix = np.copy(hidden.unsqueeze(0).data.numpy())
hidden_matrix = hidden_matrix.reshape((1,hidden_matrix.size))
inp = prime_input[:,-1]
for p in range(1,test_len):
output, hidden = decoder(inp, hidden)
hidden_matrix = np.vstack((hidden_matrix, hidden[0,0,:].data.numpy()))
# print hidden[0,0,:].data.numpy()
# Sample from the network as a multinomial distribution
# output_dist = output.data.view(-1).div(temperature).exp()
# top_i = torch.multinomial(output_dist, 1)[0]
# predicted_char = chars[top_i]
inp = torch.autograd.Variable(char_tensor(input_str[p]).unsqueeze(0))
hidden_matrix = np.delete(hidden_matrix, 0, 0)
df = pd.DataFrame(hidden_matrix)
df.to_csv('paran-data-df.csv')
np.savetxt("paren-data.csv", hidden_matrix, delimiter=",")
np.savetxt("paren-data.tsv", hidden_matrix, delimiter="\t")
# Run as standalone script
def write_batch(self, bsz, lang_h, ctx_h, temperature, max_words=100):
"""Generate sentenses for a batch simultaneously."""
eod = self.word_dict.get_idx('<selection>')
# resize the language hidden and context hidden states
lang_h = lang_h.squeeze(0).expand(bsz, lang_h.size(2))
ctx_h = ctx_h.squeeze(0).expand(bsz, ctx_h.size(2))
# start the conversation with 'YOU:'
inpt = torch.LongTensor(bsz).fill_(self.word_dict.get_idx('YOU:'))
inpt = Variable(self.to_device(inpt))
outs, lang_hs = [], [lang_h.unsqueeze(0)]
done = set()
# generate until max_words are generated, or all the dialogues are done
for _ in range(max_words):
# embed the input
inpt_emb = torch.cat([self.word_encoder(inpt), ctx_h], 1)
# pass it through the writer and get new hidden state
lang_h = self.writer(inpt_emb, lang_h)
out = self.decoder(lang_h)
# tie weights with encoder
scores = F.linear(out, self.word_encoder.weight).div(temperature)
# subtract max to make softmax more stable
scores.sub_(scores.max(1, keepdim=True)[0].expand(scores.size(0), scores.size(1)))
out = torch.multinomial(scores.exp(), 1).squeeze(1)
# save outputs and hidden states
outs.append(out.unsqueeze(0))
lang_hs.append(lang_h.unsqueeze(0))
inpt = out
data = out.data.cpu()
# check if all the dialogues in the batch are done
for i in range(bsz):
if data[i] == eod:
done.add(i)
if len(done) == bsz:
break
# run it for the last word to get correct hidden states
inpt_emb = torch.cat([self.word_encoder(inpt), ctx_h], 1)
lang_h = self.writer(inpt_emb, lang_h)
lang_hs.append(lang_h.unsqueeze(0))
# concatenate outputs and hidden states into single tensors
return torch.cat(outs, 0), torch.cat(lang_hs, 0)
def show_examples_pytorch(model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip, max_examples, reverse):
si = np.random.randint(0, len(es))
batch_dict = es[si]
src_array = batch_dict['src']
tgt_array = batch_dict['dst']
src_len = batch_dict['src_len']
#src_array, tgt_array, src_len, _ = es[si]
if max_examples > 0:
max_examples = min(max_examples, src_array.size(0))
src_array = src_array[0:max_examples]
tgt_array = tgt_array[0:max_examples]
src_len = src_len[0:max_examples]
GO = embed2.vocab['<GO>']
EOS = embed2.vocab['<EOS>']
# TODO: fix this, check for GPU first
src_array = src_array.cuda()
for src_len,src_i,tgt_i in zip(src_len, src_array, tgt_array):
print('========================================================================')
sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=reverse)
print('[OP] %s' % sent)
sent = lookup_sentence(rlut2, tgt_i)
print('[Actual] %s' % sent)
dst_i = torch.zeros(1, mxlen).long()
#if use_gpu:
dst_i = dst_i.cuda()
next_value = GO
src_i = src_i.view(1, -1)
for j in range(mxlen):
dst_i[0,j] = next_value
probv = model((torch.autograd.Variable(src_i), torch.autograd.Variable(dst_i)))
output = probv.squeeze()[j]
if sample is False:
_, next_value = torch.max(output, 0)
next_value = int(next_value.data[0])
else:
probs = output.data.exp()
# This is going to zero out low prob. events so they are not
# sampled from
best, ids = probs.topk(prob_clip, 0, largest=True, sorted=True)
probs.zero_()
probs.index_copy_(0, ids, best)
probs.div_(torch.sum(probs))
fv = torch.multinomial(probs, 1)[0]
next_value = fv
if next_value == EOS:
break
sent = lookup_sentence(rlut2, dst_i.squeeze())
print('Guess: %s' % sent)
print('------------------------------------------------------------------------')
def sample(self, fc_feats, att_feats, opt={}):
sample_max = opt.get('sample_max', 1)
beam_size = opt.get('beam_size', 1)
temperature = opt.get('temperature', 1.0)
if beam_size > 1:
return self.sample_beam(fc_feats, att_feats, opt)
batch_size = fc_feats.size(0)
state = self.init_hidden(batch_size)
seq = []
seqLogprobs = []
for t in range(self.seq_length + 2):
if t == 0:
xt = self.img_embed(fc_feats)
else:
if t == 1: # input <bos>
it = fc_feats.data.new(batch_size).long().zero_()
elif sample_max:
sampleLogprobs, it = torch.max(logprobs.data, 1)
it = it.view(-1).long()
else:
if temperature == 1.0:
prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
else:
# scale logprobs by temperature
prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
it = torch.multinomial(prob_prev, 1).cuda()
sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
it = it.view(-1).long() # and flatten indices for downstream processing
xt = self.embed(Variable(it, requires_grad=False))
if t >= 2:
# stop when all finished
if t == 2:
unfinished = it > 0
else:
unfinished = unfinished * (it > 0)
if unfinished.sum() == 0:
break
it = it * unfinished.type_as(it)
seq.append(it) #seq[t] the input of t+2 time step
seqLogprobs.append(sampleLogprobs.view(-1))
output, state = self.core(xt, state)
logprobs = F.log_softmax(self.logit(output))
return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def sample(self, fc_feats, att_feats, opt={}):
sample_max = opt.get('sample_max', 1)
beam_size = opt.get('beam_size', 1)
temperature = opt.get('temperature', 1.0)
if beam_size > 1:
return self.sample_beam(fc_feats, att_feats, opt)
batch_size = fc_feats.size(0)
state = self.init_hidden(fc_feats)
seq = []
seqLogprobs = []
for t in range(self.seq_length + 1):
if t == 0: # input <bos>
it = fc_feats.data.new(batch_size).long().zero_()
elif sample_max:
sampleLogprobs, it = torch.max(logprobs.data, 1)
it = it.view(-1).long()
else:
if temperature == 1.0:
prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
else:
# scale logprobs by temperature
prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
it = torch.multinomial(prob_prev, 1).cuda()
sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
it = it.view(-1).long() # and flatten indices for downstream processing
xt = self.embed(Variable(it, requires_grad=False))
if t >= 1:
# stop when all finished
if t == 1:
unfinished = it > 0
else:
unfinished = unfinished * (it > 0)
if unfinished.sum() == 0:
break
it = it * unfinished.type_as(it)
seq.append(it) #seq[t] the input of t+2 time step
seqLogprobs.append(sampleLogprobs.view(-1))
output, state = self.core(xt, fc_feats, att_feats, state)
logprobs = F.log_softmax(self.logit(self.dropout(output)))
return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def forward(self, fc_feats, att_feats, seq):
batch_size = fc_feats.size(0)
state = self.init_hidden(batch_size)
outputs = []
# embed fc and att feats
fc_feats = self.fc_embed(fc_feats)
_att_feats = self.att_embed(att_feats.view(-1, self.att_feat_size))
att_feats = _att_feats.view(*(att_feats.size()[:-1] + (self.rnn_size,)))
# Project the attention feats first to reduce memory and computation comsumptions.
p_att_feats = self.ctx2att(att_feats.view(-1, self.rnn_size))
p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,)))
for i in range(seq.size(1) - 1):
if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample
sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1)
sample_mask = sample_prob < self.ss_prob
if sample_mask.sum() == 0:
it = seq[:, i].clone()
else:
sample_ind = sample_mask.nonzero().view(-1)
it = seq[:, i].data.clone()
#prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1)
#it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1))
prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1)
it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind))
it = Variable(it, requires_grad=False)
else:
it = seq[:, i].clone()
# break if all the sequences end
if i >= 1 and seq[:, i].data.sum() == 0:
break
xt = self.embed(it)
output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state)
output = F.log_softmax(self.logit(output))
outputs.append(output)
return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
def sample(self, fc_feats, att_feats, opt={}):
sample_max = opt.get('sample_max', 1)
beam_size = opt.get('beam_size', 1)
temperature = opt.get('temperature', 1.0)
if beam_size > 1:
return self.sample_beam(fc_feats, att_feats, opt)
batch_size = fc_feats.size(0)
state = self.init_hidden(batch_size)
seq = []
seqLogprobs = []
for t in range(self.seq_length + 2):
if t == 0:
xt = self.img_embed(fc_feats)
else:
if t == 1: # input <bos>
it = fc_feats.data.new(batch_size).long().zero_()
elif sample_max:
sampleLogprobs, it = torch.max(logprobs.data, 1)
it = it.view(-1).long()
else:
if temperature == 1.0:
prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
else:
# scale logprobs by temperature
prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
it = torch.multinomial(prob_prev, 1).cuda()
sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
it = it.view(-1).long() # and flatten indices for downstream processing
xt = self.embed(Variable(it, requires_grad=False))
if t >= 2:
# stop when all finished
if t == 2:
unfinished = it > 0
else:
unfinished = unfinished * (it > 0)
if unfinished.sum() == 0:
break
it = it * unfinished.type_as(it)
seq.append(it) #seq[t] the input of t+2 time step
seqLogprobs.append(sampleLogprobs.view(-1))
output, state = self.core(xt.unsqueeze(0), state)
logprobs = F.log_softmax(self.logit(self.dropout(output.squeeze(0))))
return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def sample(self, fc_feats, att_feats, opt={}):
sample_max = opt.get('sample_max', 1)
beam_size = opt.get('beam_size', 1)
temperature = opt.get('temperature', 1.0)
if beam_size > 1:
return self.sample_beam(fc_feats, att_feats, opt)
batch_size = fc_feats.size(0)
state = self.init_hidden(batch_size)
# Project the attention feats first to reduce memory and computation comsumptions.
p_att_feats = self.ctx2att(att_feats.view(-1, self.att_feat_size))
p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,)))
seq = []
seqLogprobs = []
for t in range(self.seq_length + 1):
if t == 0: # input <bos>
it = fc_feats.data.new(batch_size).long().zero_()
elif sample_max:
sampleLogprobs, it = torch.max(logprobs.data, 1)
it = it.view(-1).long()
else:
if temperature == 1.0:
prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
else:
# scale logprobs by temperature
prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
it = torch.multinomial(prob_prev, 1).cuda()
sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
it = it.view(-1).long() # and flatten indices for downstream processing
xt = self.embed(Variable(it, requires_grad=False))
if t >= 1:
# stop when all finished
if t == 1:
unfinished = it > 0
else:
unfinished = unfinished * (it > 0)
if unfinished.sum() == 0:
break
it = it * unfinished.type_as(it)
seq.append(it) #seq[t] the input of t+2 time step
seqLogprobs.append(sampleLogprobs.view(-1))
output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state)
logprobs = F.log_softmax(self.logit(output))
return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)