def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
h, _ = self.encoder(None, exs)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
python类no_backprop_mode()的实例源码
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs_f = xs
xs_b = [x[::-1] for x in xs]
exs_f = sequence_embed(self.embed_x, xs_f)
exs_b = sequence_embed(self.embed_x, xs_b)
fx, _ = self.encoder_f(None, exs_f)
bx, _ = self.encoder_b(None, exs_b)
h = F.concat([fx, bx], axis=2)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss = self.CalcLoss(xs, ys)
return loss.data
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
h, c, _ = self.encoder(None, None, exs)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, c, ys = self.decoder(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss, n_w, n_c, n_c_a = self.CalcLoss(xs, ys)
return loss.data
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss, n_w, n_c, n_c_a = self.CalcLoss(xs, ys)
return loss.data
def generate_text(model, seed, length=512, top_n=10):
"""
generates text of specified length from trained model
with given seed character sequence.
"""
logger.info("generating %s characters from top %s choices.", length, top_n)
logger.info('generating with seed: "%s".', seed)
generated = seed
encoded = encode_text(seed).astype(np.int32)
model.predictor.reset_state()
with chainer.using_config("train", False), chainer.no_backprop_mode():
for idx in encoded[:-1]:
x = Variable(np.array([idx]))
# input shape: [1]
# set internal states
model.predictor(x)
next_index = encoded[-1]
for i in range(length):
x = Variable(np.array([next_index], dtype=np.int32))
# input shape: [1]
probs = F.softmax(model.predictor(x))
# output shape: [1, vocab_size]
next_index = sample_from_probs(probs.data.squeeze(), top_n)
# append to sequence
generated += ID2CHAR[next_index]
logger.info("generated text: \n%s\n", generated)
return generated
def feature(self, x):
with chainer.no_backprop_mode():
return self.fe(x).data
def predict(self, f):
with chainer.no_backprop_mode():
h = F.relu(f)
h = F.relu(self.fc1(f))
y = self.fc2(h)
return y.data
def get_greedy_action(Q, obs, show_f=False):
xp = Q.xp
obs = xp.expand_dims(xp.asarray(obs, dtype=np.float32), 0)
with chainer.no_backprop_mode():
f = Q.feature(obs)
q = Q.predict(f)[0]
#q = Q(obs).data[0]
if show_f:
show_feature(f)
return int(xp.argmax(q))
def update(Q, target_Q, opt, samples, gamma=0.99, target_type='double_dqn'):
xp = Q.xp
s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
a = np.asarray([sample[1] for sample in samples], dtype=np.int32)
r = np.asarray([sample[2] for sample in samples], dtype=np.float32)
done = np.asarray([sample[3] for sample in samples], dtype=np.float32)
s_next = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
for i in xrange(minibatch_size):
s[i] = samples[i][0]
s_next[i] = samples[i][4]
# to gpu if available
s = xp.asarray(s)
a = xp.asarray(a)
r = xp.asarray(r)
done = xp.asarray(done)
s_next = xp.asarray(s_next)
# Prediction: Q(s,a)
y = F.select_item(Q(s), a)
# Target: r + gamma * max Q_b (s',b)
with chainer.no_backprop_mode():
if target_type == 'dqn':
t = r + gamma * (1 - done) * F.max(target_Q(s_next), axis=1)
elif target_type == 'double_dqn':
t = r + gamma * (1 - done) * F.select_item(
target_Q(s_next), F.argmax(Q(s_next), axis=1))
else:
raise ValueError('Unsupported target_type: {}'.format(target_type))
loss = mean_clipped_loss(y, t)
Q.cleargrads()
loss.backward()
opt.update()
def get_greedy_action(Q, obs):
xp = Q.xp
obs = xp.expand_dims(xp.asarray(obs, dtype=np.float32), 0)
with chainer.no_backprop_mode():
q = Q(obs).data[0]
return int(xp.argmax(q))
def main():
args = parse_args()
gen1 = net.Generator1()
chainer.serializers.load_npz(args.model_path, gen1)
device_id = None
if args.gpu >= 0:
device_id = args.gpu
cuda.get_device(device_id).use()
gen1.to_gpu(device_id)
out_vector_path = None
np.random.seed(1)
if args.vector_file1 and args.vector_index1 >= 0 and args.vector_file2 and args.vector_index2 >= 0:
with open(args.vector_file1, 'rb') as f:
z = np.load(f)
z1 = z[args.vector_index1]
with open(args.vector_file2, 'rb') as f:
z = np.load(f)
z2 = z[args.vector_index2]
w = np.arange(10).astype(np.float32).reshape((-1, 1)) / 9
z = (1 - w) * z1 + w * z2
z = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1e-12)
else:
z = np.random.normal(0, 1, (100, latent_size)).astype(np.float32)
out_vector_path = '{}.npy'.format(args.output)
z = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1e-12)
with chainer.no_backprop_mode():
if device_id is None:
x = gen1(z, train=False)
else:
x = gen1(cuda.to_gpu(z, device_id), train=False)
x = cuda.to_cpu(x.data)
batch, ch, h, w = x.shape
x = x.reshape((-1, 10, ch, h, w)).transpose((0, 3, 1, 4, 2)).reshape((-1, 10 * w, ch))
x = ((x + 1) * 127.5).clip(0, 255).astype(np.uint8)
Image.fromarray(x).save('{}.jpg'.format(args.output))
if out_vector_path:
with open(out_vector_path, 'wb') as f:
np.save(f, z)
def __call__(self, x):
if self.g.data is None:
if self.V.data is None:
kh, kw = _pair(self.ksize)
V_shape = (self.out_channels, x.shape[1], kh, kw)
self.V.initialize(V_shape)
xp = cuda.get_array_module(x)
with chainer.no_backprop_mode():
t = convolution_2d(x, self.V, Variable(xp.full((self.out_channels, 1, 1, 1), 1.0).astype(x.dtype)), None, self.stride, self.pad) # compute output with g = 1 and without bias
mean_t, std_t = self._initialize_params(t.data)
return (t - mean_t) / std_t
return convolution_2d(x, self.V, self.g, self.b, self.stride, self.pad)
def translate(self, x_block, max_length=50):
# TODO: efficient inference by re-using convolution result
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
# if isinstance(x_block, list):
x_block = source_pad_concat_convert(
x_block, device=None)
batch, x_length = x_block.shape
y_block = self.xp.zeros((batch, 1), dtype=x_block.dtype)
eos_flags = self.xp.zeros((batch, ), dtype=x_block.dtype)
result = []
for i in range(max_length):
log_prob_tail = self(x_block, y_block, y_block,
get_prediction=True)
ys = self.xp.argmax(log_prob_tail.data, axis=1).astype('i')
result.append(ys)
y_block = F.concat([y_block, ys[:, None]], axis=1).data
eos_flags += (ys == 0)
if self.xp.all(eos_flags):
break
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
if len(y) == 0:
y = np.array([1], 'i')
outs.append(y)
return outs
def translate(self, xs, max_length=100):
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
# Encode input sequence and send hidden stats to decoder.
self.mn_encoder(exs)
# Encoder does not return anything.
# All evaluation will be done in decoder process.
return None
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
result = []
ys = self.xp.zeros(batch, 'i')
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
# Receive hidden stats from encoder process.
h, c, ys, _ = self.mn_decoder(eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
# Recursively decode using the previously predicted token.
for i in range(1, max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
# Non-MN RNN link can be accessed via `actual_rnn`.
h, c, ys = self.mn_decoder.actual_rnn(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = numpy.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def evaluate(self):
bt = time.time()
with chainer.no_backprop_mode():
references = []
hypotheses = []
observation = {}
with reporter.report_scope(observation):
for i in range(0, len(self.test_data), self.batch):
src, trg = zip(*self.test_data[i:i + self.batch])
references.extend([[t.tolist()] for t in trg])
src = [chainer.dataset.to_device(self.device, x)
for x in src]
if self.comm.rank == 0:
self.model.translate(src, self.max_length)
elif self.comm.rank == 1:
ys = [y.tolist()
for y in self.model.translate(
src, self.max_length)]
hypotheses.extend(ys)
if self.comm.rank == 1:
bleu = bleu_score.corpus_bleu(
references, hypotheses, smoothing_function=bleu_score.
SmoothingFunction().method1)
reporter.report({'bleu': bleu}, self.model)
et = time.time()
if self.comm.rank == 1:
print("BleuEvaluator(single)::evaluate(): "
"took {:.3f} [s]".format(et - bt))
sys.stdout.flush()
return observation
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
# Initial hidden variable and cell variable
# zero = self.xp.zeros((self.n_layers, batch, self.n_units), 'f') # NOQA
# h, c, _ = self.encoder(zero, zero, exs, train=False) # NOQA
h, c, _ = self.encoder(None, None, exs)
ys = self.xp.zeros(batch, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
h, c, ys = self.decoder(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = numpy.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate_with_beam_search(self, sentence: np.ndarray, max_length: int = 30, beam_width=3) -> List[int]:
with chainer.no_backprop_mode(), chainer.using_config('train', False):
sentence = sentence[::-1]
embedded_xs = self._embed_input(sentence)
hidden_states, cell_states, attentions = self._encoder(None, None, [embedded_xs])
heaps = [[] for _ in range(max_length + 1)]
heaps[0].append((0, [EOS], hidden_states, cell_states)) # (score, translation, hidden_states, cell_states)
solution = []
solution_score = 1e8
for i in range(max_length):
heaps[i] = sorted(heaps[i], key=lambda t: t[0])[:beam_width]
for score, translation, i_hidden_states, i_cell_states in heaps[i]:
wid = translation[-1]
output, new_hidden_states, new_cell_states = \
self._translate_one_word(wid, i_hidden_states, i_cell_states, attentions)
for next_wid in np.argsort(output.data)[::-1]:
if output.data[next_wid] < 1e-6:
break
next_score = score - np.log(output.data[next_wid])
if next_score > solution_score:
break
next_translation = translation + [next_wid]
next_item = (next_score, next_translation, new_hidden_states, new_cell_states)
if next_wid == EOS:
if next_score < solution_score:
solution = translation[1:] # [1:] drops first EOS
solution_score = next_score
else:
heaps[i + 1].append(next_item)
return solution