def __call__(self, X, return_last=False):
batchsize = X.shape[0]
seq_length = X.shape[1]
enmbedding = self.embed(X)
enmbedding = F.swapaxes(enmbedding, 1, 2)
out_data = self._forward_layer(0, enmbedding)
in_data = [out_data]
for layer_index in range(1, self.num_layers):
out_data = self._forward_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1]) # dense conv
in_data.append(out_data)
out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv
if return_last:
out_data = out_data[:, :, -1, None]
if self.using_dropout:
out_data = F.dropout(out_data, ratio=self.dropout)
out_data = self.fc(out_data)
out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))
return out_data
python类concat()的实例源码
def Q_func(self, state):
if state.ndim == 2:
agent_state = state[:, - self.agent_state_dim :]
market_state = state[:,:self.market_state_dim]
elif state.ndim == 3:
agent_state = state[:, :,- self.agent_state_dim :]
market_state = state[:,:,:self.market_state_dim]
a_state = Variable(agent_state)
m_state = Variable(market_state)
a = F.tanh(self.a1(a_state))
a = F.tanh(self.a2(a))
a = F.tanh(self.a3(a))
m = F.tanh(self.s1(m_state))
m = F.tanh(self.s2(m))
m = F.tanh(self.s3(m))
new_state = F.concat((a, m), axis=1)
h = F.tanh(self.fc4(new_state))
h = F.tanh(self.fc5(h))
Q = self.q_value(h)
return Q
def __call__(self, x, z, ze, mask, conv_mask):
att_scale = self.xp.sum(
mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5
pad = self.xp.zeros(
(x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype)
base_x = x
z = F.squeeze(z, axis=3)
# Note: these behaviors of input, output, and attention result
# may refer to the code by authors, which looks little different
# from the paper's saying.
for conv_name, preatt_name in zip(self.conv_names, self.preatt_names):
# Calculate Output of GLU
out = getattr(self, conv_name)(
F.concat([pad, x], axis=2), conv_mask)
# Calcualte Output of Attention using Output of GLU
preatt = seq_linear(getattr(self, preatt_name), out)
query = base_x + preatt
query = F.squeeze(query, axis=3)
c = self.attend(query, z, ze, mask) * att_scale
# Merge Them in Redidual Calculation and Scaling
x = (x + (c + out) * scale05) * scale05
return x
convolutional_pose_machine.py 文件源码
项目:convolutional-pose-machines-chainer
作者: tomoyukun
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __call__(self, pmap, fmap, cmap):
fmap = self.conv0(fmap)
fmap = F.relu(fmap)
cmap = F.average_pooling_2d(cmap, ksize=8, stride=8)
h = F.concat((fmap, pmap, cmap), 1)
h = self.conv1(h)
h = F.relu(h)
h = self.conv2(h)
h = F.relu(h)
h = self.conv3(h)
h = F.relu(h)
h = self.conv4(h)
h = F.relu(h)
h = self.conv5(h)
return h
__init__.py 文件源码
项目:convolutional-pose-machines-chainer
作者: tomoyukun
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __call__(self, pmap, fmap, cmap):
fmap = self.conv0(fmap)
fmap = F.relu(fmap)
cmap = F.average_pooling_2d(cmap, ksize=8, stride=8)
h = F.concat((fmap, pmap, cmap), 1)
h = self.conv1(h)
h = F.relu(h)
h = self.conv2(h)
h = F.relu(h)
h = self.conv3(h)
h = F.relu(h)
h = self.conv4(h)
h = F.relu(h)
h = self.conv5(h)
return h
def __call__(self, xs):
if self.freeze:
self.embed.disable_update()
xs = self.embed(xs)
batchsize, height, width = xs.shape
xs = F.reshape(xs, (batchsize, 1, height, width))
conv3_xs = self.conv3(xs)
conv4_xs = self.conv4(xs)
conv5_xs = self.conv5(xs)
h1 = F.max_pooling_2d(F.relu(conv3_xs), conv3_xs.shape[2])
h2 = F.max_pooling_2d(F.relu(conv4_xs), conv4_xs.shape[2])
h3 = F.max_pooling_2d(F.relu(conv5_xs), conv5_xs.shape[2])
concat_layer = F.concat([h1, h2, h3], axis=1)
with chainer.using_config('train', True):
y = self.l1(F.dropout(F.tanh(concat_layer)))
return y
def forward(self, data):
ep_list = [self.p_embed(d[0], d[1]) for d in data]
ec_list = [self.c_embed(d[0], d[1]) for d in data]
er_list = [self.r_embed(d[0], d[1]) for d in data]
p_list = self.p_encode(ep_list)
c_list = self.c_encode(ec_list)
r_list = self.r_encode(er_list)
P = functions.reshape(
functions.concat(p_list, 0),
(1, len(data), self.hidden_size))
C = functions.reshape(
functions.concat(c_list, 0),
(1, len(data), self.hidden_size))
R = functions.concat(r_list, 0)
parent_scores = functions.reshape(
functions.batch_matmul(C, P, transb=True),
(len(data), len(data)))
root_scores = functions.reshape(
self.r_scorer(R),
(1, len(data)))
return parent_scores, root_scores
def __call__(self):
mem_optimize = nmtrain.optimization.chainer_mem_optimize
# Calculate Attention vector
a = self.attention(self.S, self.h)
# Calculate context vector
c = F.squeeze(F.batch_matmul(self.S, a, transa=True), axis=2)
# Calculate hidden vector + context
self.ht = self.context_project(F.concat((self.h, c), axis=1))
# Calculate Word probability distribution
y = mem_optimize(self.affine_vocab, F.tanh(self.ht), level=1)
if self.use_lexicon:
y = self.lexicon_model(y, a, self.ht, self.lexicon_matrix)
if nmtrain.environment.is_train():
return nmtrain.models.decoders.Output(y=y)
else:
# Return the vocabulary size output projection
return nmtrain.models.decoders.Output(y=y, a=a)
def convert(batch, device):
def to_device_batch(batch):
if device is None:
return batch
elif device < 0:
return [chainer.dataset.to_device(device, x) for x in batch]
else:
xp = cuda.cupy.get_array_module(*batch)
concat = xp.concatenate(batch, axis=0)
sections = numpy.cumsum([len(x) for x in batch[:-1]], dtype='i')
concat_dev = chainer.dataset.to_device(device, concat)
batch_dev = cuda.cupy.split(concat_dev, sections)
return batch_dev
return tuple(
to_device_batch([x for x, _ in batch]) +
to_device_batch([y for _, y in batch]))
def convert(batch, device):
def to_device_batch(batch):
if device is None:
return batch
elif device < 0:
return [chainer.dataset.to_device(device, x) for x in batch]
else:
xp = cuda.cupy.get_array_module(*batch)
concat = xp.concatenate(batch, axis=0)
sections = numpy.cumsum([len(x) for x in batch[:-1]], dtype='i')
concat_dev = chainer.dataset.to_device(device, concat)
batch_dev = cuda.cupy.split(concat_dev, sections)
return batch_dev
return tuple(
to_device_batch([x for x, _ in batch]) +
to_device_batch([y for _, y in batch]))
def update_core(self):
batch = self.get_iterator('main').next()
A = data_process([A for A,B in batch], self.converter, self.device)
B = data_process([B for A,B in batch], self.converter, self.device)
real_AB = F.concat((A, B))
fake_B = self.G(A, test=False)
fake_AB = F.concat((A, fake_B))
real_D = self.D(real_AB, test=False)
fake_D = self.D(fake_AB, test=False)
optimizer_G = self.get_optimizer('main')
optimizer_D = self.get_optimizer('D')
optimizer_D.update(self.loss_D, real_D, fake_D)
optimizer_G.update(self.loss_G, B, fake_B, fake_D)
def __call__(self, x, test=False, dropout=True):
e1 = self.c1(x)
e2 = self.b2(self.c2(F.leaky_relu(e1)), test=test)
e3 = self.b3(self.c3(F.leaky_relu(e2)), test=test)
e4 = self.b4(self.c4(F.leaky_relu(e3)), test=test)
e5 = self.b5(self.c5(F.leaky_relu(e4)), test=test)
e6 = self.b6(self.c6(F.leaky_relu(e5)), test=test)
e7 = self.b7(self.c7(F.leaky_relu(e6)), test=test)
e8 = self.b8(self.c8(F.leaky_relu(e7)), test=test)
d1 = F.concat((F.dropout(self.b1_d(self.dc1(F.relu(e8)), test=test), train=dropout), e7))
d2 = F.concat((F.dropout(self.b2_d(self.dc2(F.relu(d1)), test=test), train=dropout), e6))
d3 = F.concat((F.dropout(self.b3_d(self.dc3(F.relu(d2)), test=test), train=dropout), e5))
d4 = F.concat((self.b4_d(self.dc4(F.relu(d3)), test=test), e4))
d5 = F.concat((self.b5_d(self.dc5(F.relu(d4)), test=test), e3))
d6 = F.concat((self.b6_d(self.dc6(F.relu(d5)), test=test), e2))
d7 = F.concat((self.b7_d(self.dc7(F.relu(d6)), test=test), e1))
y = F.tanh(self.dc8(F.relu(d7)))
return y
def __call__(self, x1, x2):
xp = self.xp
out_size = self.out_size
batch_size, len1, dim1 = x1.shape
if not self.nobias[0]:
x1 = F.concat((x1, xp.ones((batch_size, len1, 1),
dtype=xp.float32)), axis=2)
dim1 += 1
len2, dim2 = x2.shape[1:]
if not self.nobias[1]:
x2 = F.concat((x2, xp.ones((batch_size, len2, 1),
dtype=xp.float32)), axis=2)
dim2 += 1
x1_reshaped = F.reshape(x1, (batch_size * len1, dim1))
W_reshaped = F.reshape(F.transpose(self.W, (0, 2, 1)),
(dim1, out_size * dim2))
affine = F.reshape(F.matmul(x1_reshaped, W_reshaped),
(batch_size, len1 * out_size, dim2))
biaffine = F.transpose(
F.reshape(batch_matmul(affine, x2, transb=True),
(batch_size, len1, out_size, len2)),
(0, 1, 3, 2))
if not self.nobias[2]:
biaffine += F.broadcast_to(self.b, biaffine.shape)
return biaffine
def __call__(self, xs: List[Variable], ys: List[Variable]) -> Variable:
batch_size = len(xs)
xs = [x[::-1] for x in xs]
eos = np.array([EOS], dtype=np.int32)
ys_in = [F.concat((eos, y), axis=0) for y in ys]
ys_out = [F.concat((y, eos), axis=0) for y in ys]
embedded_xs = [self._embed_input(x) for x in xs]
embedded_ys = [self._embed_output(y) for y in ys_in]
hidden_states, cell_states, attentions = self._encoder(None, None, embedded_xs)
_, _, embedded_outputs = self._decoder(hidden_states, cell_states, embedded_ys)
loss = 0
for embedded_output, y, attention in zip(embedded_outputs, ys_out, attentions):
if self._use_attention:
output = self._calculate_attention_layer_output(embedded_output, attention)
else:
output = self._extract_output(embedded_output)
loss += F.softmax_cross_entropy(output, y)
loss /= batch_size
return loss
def __call__(self, x_0: chainer.Variable, x_1: chainer.Variable) -> typing.List[chainer.Variable]:
hs = []
h = self.c0_0(x_0)
if self.will_concat:
h = F.concat([h, self.c0_1(x_1)])
h = self.c1(h)
hs.append(self.out_1(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3]))))
# hs.append(chainer.functions.average_pooling_2d
h = self.c2(h)
hs.append(self.out_2(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3]))))
h = self.c3(h)
h = self.c4(h)
hs.append(h)
return hs
def __call__(self, X):
# generate random values
R = np.random.randn(X.data.shape[0], self.rand_sz)
R = Variable(R.astype("float32"))
# attach random to the inputs
h = F.concat([R, X])
#h = R
h = self.ipt(h)
#h = F.dropout(h)
y = self.out(h)
# prior knowledge: environment observation is one - hot vector
obs = F.softmax(y[:, :-2])
# prior knowledge: reward is in [0,1]
rew = F.sigmoid(y[:,[-2]])
fin = F.sigmoid(y[:, [-1]])
y = F.concat([obs, rew, fin])
return y
def processDecLSTMOneStep(self, decInputEmb, lstm_states_in,
finalHS, args, dropout_rate):
# 1, RNN???????????
# ?beam search?????????????????
self.model.decLSTM.setAllLSTMStates(lstm_states_in)
# 2, ??????????input feed???
if self.flag_dec_ifeed == 0: # inputfeed?????
wenbed = decInputEmb
elif self.flag_dec_ifeed == 1: # inputfeed??? (default)
wenbed = chaFunc.concat((finalHS, decInputEmb))
# elif self.flag_dec_ifeed == 2: # decInputEmb????? (debug?)
# wenbed = finalHS
else:
assert 0, "ERROR"
# 3? N???RNN???????
h1 = self.model.decLSTM.processOneStepForward(
wenbed, args, dropout_rate)
# 4, ???????????LSTM???????
lstm_states_out = self.model.decLSTM.getAllLSTMStates()
return h1, lstm_states_out
# attention???
def __call__(self, x):
minibatch_size = x.shape[0]
activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
activation_ex = F.expand_dims(activation, 3)
activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
diff = activation_ex - activation_ex_t
xp = chainer.cuda.get_array_module(x.data)
eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
sum_diff = F.sum(abs(diff), axis=2)
sum_diff = F.broadcast_to(sum_diff, eps.shape)
abs_diff = sum_diff + eps
minibatch_features = F.sum(F.exp(-abs_diff), 2)
return F.concat((x, minibatch_features), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def __call__(self, x):
if not hasattr(self, 'encoding') or self.encoding is None:
self.batch_size = x.shape[0]
self.init()
dims = len(x.shape) - 1
f, z, o = F.split_axis(self.pre(x), 3, axis=dims)
f = F.sigmoid(f)
z = (1 - f) * F.tanh(z)
o = F.sigmoid(o)
if dims == 2:
self.c = strnn(f, z, self.c[:self.batch_size])
else:
self.c = f * self.c + z
if self.attention:
context = attention_sum(self.encoding, self.c)
self.h = o * self.o(F.concat((self.c, context), axis=dims))
else:
self.h = self.c * o
self.x = x
return self.h