def __init__(self, ngpu, **kwargs):
super(netG_images, self).__init__()
self.ngpu = ngpu
pl = 0
self.L1 = kwargs['L1']
self.L2 = kwargs['L2']
self.K = kwargs['K']
self.arguments = kwargs['arguments']
self.l1 = nn.Linear(self.L1, self.K+pl, bias=True)
initializationhelper(self.l1, 'tanh')
self.l2 = nn.Linear(self.K+pl, self.L2, bias=True)
initializationhelper(self.l2, 'relu')
self.smooth_output = self.arguments.smooth_output
if self.smooth_output:
self.sml = nn.Conv2d(1, 1, 5, padding=2)
initializationhelper(self.sml, 'relu')
python类tanh()的实例源码
def __init__(self, ngpu, **kwargs):
super(netD_images, self).__init__()
self.ngpu = ngpu
self.L = kwargs['L']
self.K = kwargs['K']
self.arguments = kwargs['arguments']
self.l1 = nn.Linear(self.L, self.K, bias=True)
initializationhelper(self.l1, 'tanh')
self.l1_bn = nn.BatchNorm1d(self.K)
self.l2 = nn.Linear(self.K, self.K, bias=True)
initializationhelper(self.l2, 'relu')
#self.l2_bn = nn.BatchNorm1d(self.K)
self.l3 = nn.Linear(self.K, 1, bias=True)
initializationhelper(self.l3, 'relu')
def _transform_decoder_init_state(self, hn):
if isinstance(hn, tuple):
hn, cn = hn
# hn [2 * num_layers, batch, hidden_size]
num_dir, batch, hidden_size = cn.size()
# first convert cn t0 [batch, 2 * num_layers, hidden_size]
cn = cn.transpose(0, 1).contiguous()
# then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers]
cn = cn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1)
# take hx_dense to [num_layers, batch, hidden_size]
cn = self.hx_dense(cn)
# hn is tanh(cn)
hn = F.tanh(cn)
hn = (hn, cn)
else:
# hn [2 * num_layers, batch, hidden_size]
num_dir, batch, hidden_size = hn.size()
# first convert hn t0 [batch, 2 * num_layers, hidden_size]
hn = hn.transpose(0, 1).contiguous()
# then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers]
hn = hn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1)
# take hx_dense to [num_layers, batch, hidden_size]
hn = F.tanh(self.hx_dense(hn))
return hn
def SkipConnectGRUCell(input, hidden, hidden_skip, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None):
input = input.expand(3, *input.size()) if noise_in is None else input.unsqueeze(0) * noise_in
hx = torch.cat([hidden, hidden_skip], dim=1)
hx = hx.expand(3, *hx.size()) if noise_hidden is None else hx.unsqueeze(0) * noise_hidden
gi = torch.baddbmm(b_ih.unsqueeze(1), input, w_ih)
gh = torch.baddbmm(b_hh.unsqueeze(1), hx, w_hh)
i_r, i_i, i_n = gi
h_r, h_i, h_n = gh
resetgate = F.sigmoid(i_r + h_r)
inputgate = F.sigmoid(i_i + h_i)
newgate = F.tanh(i_n + resetgate * h_n)
hy = newgate + inputgate * (hidden - newgate)
return hy
def SkipConnectFastGRUCell(input, hidden, hidden_skip, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None):
if noise_in is not None:
input = input * noise_in
hx = torch.cat([hidden, hidden_skip], dim=1)
if noise_hidden is not None:
hx = hx * noise_hidden
if input.is_cuda:
gi = F.linear(input, w_ih)
gh = F.linear(hx, w_hh)
state = fusedBackend.GRUFused()
return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh)
gi = F.linear(input, w_ih, b_ih)
gh = F.linear(hx, w_hh, b_hh)
i_r, i_i, i_n = gi.chunk(3, 1)
h_r, h_i, h_n = gh.chunk(3, 1)
resetgate = F.sigmoid(i_r + h_r)
inputgate = F.sigmoid(i_i + h_i)
newgate = F.tanh(i_n + resetgate * h_n)
hy = newgate + inputgate * (hidden - newgate)
return hy
def VarLSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None):
input = input.expand(4, *input.size()) if noise_in is None else input.unsqueeze(0) * noise_in
hx, cx = hidden
hx = hx.expand(4, *hx.size()) if noise_hidden is None else hx.unsqueeze(0) * noise_hidden
gates = torch.baddbmm(b_ih.unsqueeze(1), input, w_ih) + torch.baddbmm(b_hh.unsqueeze(1), hx, w_hh)
ingate, forgetgate, cellgate, outgate = gates
ingate = F.sigmoid(ingate)
forgetgate = F.sigmoid(forgetgate)
cellgate = F.tanh(cellgate)
outgate = F.sigmoid(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * F.tanh(cy)
return hy, cy
def VarFastGRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None):
if noise_in is not None:
input = input * noise_in
hx = hidden if noise_hidden is None else hidden * noise_hidden
if input.is_cuda:
gi = F.linear(input, w_ih)
gh = F.linear(hx, w_hh)
state = fusedBackend.GRUFused()
return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh)
gi = F.linear(input, w_ih, b_ih)
gh = F.linear(hx, w_hh, b_hh)
i_r, i_i, i_n = gi.chunk(3, 1)
h_r, h_i, h_n = gh.chunk(3, 1)
resetgate = F.sigmoid(i_r + h_r)
inputgate = F.sigmoid(i_i + h_i)
newgate = F.tanh(i_n + resetgate * h_n)
hy = newgate + inputgate * (hidden - newgate)
return hy
def forward(self, output, context):
batch_size = output.size(0)
hidden_size = output.size(2)
input_size = context.size(1)
# (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
attn = torch.bmm(output, context.transpose(1, 2))
if self.mask is not None:
attn.data.masked_fill_(self.mask, -float('inf'))
attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size)
# (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
mix = torch.bmm(attn, context)
# concat -> (batch, out_len, 2*dim)
combined = torch.cat((mix, output), dim=2)
# output -> (batch, out_len, dim)
output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)
return output, attn
def forward(self, xt, state):
all_input_sums = self.i2h(xt) + self.h2h(state[0][-1])
sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size)
sigmoid_chunk = F.sigmoid(sigmoid_chunk)
in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size)
forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size)
out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size)
in_transform = torch.max(\
all_input_sums.narrow(1, 3 * self.rnn_size, self.rnn_size),
all_input_sums.narrow(1, 4 * self.rnn_size, self.rnn_size))
next_c = forget_gate * state[1][-1] + in_gate * in_transform
next_h = out_gate * F.tanh(next_c)
next_h = self.dropout(next_h)
output = next_h
state = (next_h.unsqueeze(0), next_c.unsqueeze(0))
return output, state
def forward(self, h, att_feats, p_att_feats):
# The p_att_feats here is already projected
att_size = att_feats.numel() // att_feats.size(0) // self.rnn_size
att = p_att_feats.view(-1, att_size, self.att_hid_size)
att_h = self.h2att(h) # batch * att_hid_size
att_h = att_h.unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size
dot = att + att_h # batch * att_size * att_hid_size
dot = F.tanh(dot) # batch * att_size * att_hid_size
dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size
dot = self.alpha_net(dot) # (batch * att_size) * 1
dot = dot.view(-1, att_size) # batch * att_size
weight = F.softmax(dot) # batch * att_size
att_feats_ = att_feats.view(-1, att_size, self.rnn_size) # batch * att_size * att_feat_size
att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size
return att_res
def node_forward(self, inputs, child_c, child_h):
child_h_sum = F.torch.sum(torch.squeeze(child_h, 1), 0)
i = F.sigmoid(self.ix(inputs) + self.ih(child_h_sum))
o = F.sigmoid(self.ox(inputs) + self.oh(child_h_sum))
u = F.tanh(self.ux(inputs) + self.uh(child_h_sum))
# add extra singleton dimension
fx = F.torch.unsqueeze(self.fx(inputs), 1)
f = F.torch.cat([self.fh(child_hi) + fx for child_hi in child_h], 0)
f = F.sigmoid(f)
# removing extra singleton dimension
f = F.torch.unsqueeze(f, 1)
fc = F.torch.squeeze(F.torch.mul(f, child_c), 1)
c = F.torch.mul(i, u) + F.torch.sum(fc, 0)
h = F.torch.mul(o, F.tanh(c))
return c, h
def __init__(self, n_in, n_out, batchnorm=False, preactivation=True, gate_style='add_split', kernel_size=7):
super(SMASHLayer, self).__init__()
self.n_out = n_out
self.n_in = n_in
self.batchnorm = batchnorm
self.preactivation = preactivation
self.gate_style = gate_style
''' may want to make n_in and n_out more dynamic here'''
self.op = nn.ModuleList([SMASHseq(n_in=n_in if not i%2 else n_out,
n_out=n_out,
dilation=1,
batchnorm=self.batchnorm,
preactivation=self.preactivation,
kernel_size=kernel_size)
for i in range(4)])
# Op represents the op definition, gate whether to use tanh-sig mult gates,
# dilation the individual dilation factors, and NL the particular
# activation to use at each ungated conv.
# Groups is currently unactivated, we'd need to make sure we slice differently
# if using variable group.
neural_combinatorial_rl.py 文件源码
项目:neural-combinatorial-rl-pytorch
作者: pemami4911
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def forward(self, query, ref):
"""
Args:
query: is the hidden state of the decoder at the current
time step. batch x dim
ref: the set of hidden states from the encoder.
sourceL x batch x hidden_dim
"""
# ref is now [batch_size x hidden_dim x sourceL]
ref = ref.permute(1, 2, 0)
q = self.project_query(query).unsqueeze(2) # batch x dim x 1
e = self.project_ref(ref) # batch_size x hidden_dim x sourceL
# expand the query by sourceL
# batch x dim x sourceL
expanded_q = q.repeat(1, 1, e.size(2))
# batch x 1 x hidden_dim
v_view = self.v.unsqueeze(0).expand(
expanded_q.size(0), len(self.v)).unsqueeze(1)
# [batch_size x 1 x hidden_dim] * [batch_size x hidden_dim x sourceL]
u = torch.bmm(v_view, self.tanh(expanded_q + e)).squeeze(1)
if self.use_tanh:
logits = self.C * self.tanh(u)
else:
logits = u
return e, logits
def _step(self, H_t, T_t, C_t, h0, h_mask, t_mask, c_mask):
s_lm1, rnns = h0, [self.rnn_h, self.rnn_t, self.rnn_c]
for l, (rnn_h, rnn_t, rnn_c) in enumerate(zip(*rnns)):
s_lm1_H = h_mask.expand_as(s_lm1) * s_lm1
s_lm1_T = t_mask.expand_as(s_lm1) * s_lm1
s_lm1_C = c_mask.expand_as(s_lm1) * s_lm1
if l == 0:
H_t = F.tanh(H_t + rnn_h(s_lm1_H))
T_t = F.sigmoid(T_t + rnn_t(s_lm1_T))
C_t = F.sigmoid(C_t + rnn_t(s_lm1_C))
else:
H_t = F.tanh(rnn_h(s_lm1_H))
T_t = F.sigmoid(rnn_t(s_lm1_T))
C_t = F.sigmoid(rnn_t(s_lm1_C))
s_l = H_t * T_t + s_lm1 * C_t
s_lm1 = s_l
return s_l
def forward(self, dec_out, enc_outs, enc_att=None, mask=None):
"""
Parameters:
-----------
- dec_out: torch.Tensor(batch_size x hid_dim)
- enc_outs: torch.Tensor(seq_len x batch_size x hid_dim)
- enc_att: (optional), torch.Tensor(seq_len x batch_size x att_dim)
- mask: (optional), torch.ByteTensor(batch_size x seq_len)
"""
# (batch x seq_len)
weights = self.scorer(dec_out, enc_outs, enc_att=enc_att)
if mask is not None:
# weights = weights * mask.float()
weights.data.masked_fill_(1 - mask.data, -float('inf'))
weights = F.softmax(weights, dim=1)
# (eq 7)
context = weights.unsqueeze(1).bmm(enc_outs.transpose(0, 1)).squeeze(1)
# (eq 5) linear out combining context and hidden
context = F.tanh(self.linear_out(torch.cat([context, dec_out], 1)))
return context, weights
def forward(self, x):
"""
A model for non-linear data that works off of mixing multiple Gaussian
distributions together. Uses linear projections of a given input to generate
a set of N Gaussian models' mixture components, means and standard deviations.
:param x: (num. samples, input dim.)
:return: Mixture components, means, and standard deviations
in the form (num. samples, num. mixtures)
"""
x = F.tanh(self.projection(x))
weights = F.softmax(self.weights_projection(x))
means = self.mean_projection(x)
stds = torch.exp(self.std_projection(x))
return weights, means, stds
def forward(self, x):
"""
Conditional Image Generation with PixelCNN Decoders
http://arxiv.org/abs/1606.05328
1D gated activation unit that models the forget gates and
real gates of an activation unit using convolutions.
:param x: (batch size, # channels, height)
:return: tanh(conv(Wr, x)) * sigmoid(conv(Wf, x))
"""
real_gate_weights, forget_gate_weights = self.weights.split(self.kernel_size, dim=2)
real_gate_weights = real_gate_weights.contiguous()
forget_gate_weights = forget_gate_weights.contiguous()
real_gate = F.tanh(F.conv1d(input=x, weight=real_gate_weights, stride=1))
forget_gate = F.sigmoid(F.conv1d(input=x, weight=forget_gate_weights, stride=1))
return real_gate * forget_gate
def forward(self, *hidden_states):
if len(hidden_states) == 1:
hidden_state = hidden_states[0]
return F.softmax(F.tanh(self.projection(hidden_state))) * hidden_state
elif len(hidden_states) == 2:
left_hidden_state, right_hidden_state = hidden_states
if self.mode == 0 or self.mode == 1:
if self.mode == 0:
left_attention_weights = F.softmax(F.tanh(self.projection(left_hidden_state)))
right_attention_weights = F.softmax(F.tanh(self.projection(right_hidden_state)))
elif self.mode == 1:
left_attention_weights = F.softmax(F.tanh(self.left_projection(left_hidden_state)))
right_attention_weights = F.softmax(F.tanh(self.right_projection(right_hidden_state)))
return left_attention_weights * left_hidden_state, right_attention_weights * right_hidden_state
elif self.mode == 2:
hidden_state = torch.cat([left_hidden_state, right_hidden_state], dim=1)
attention_weights = F.softmax(F.tanh(self.projection(hidden_state)))
return attention_weights * left_hidden_state, attention_weights * right_hidden_state
def forward(self, last_state, states, mask=None):
sequence_length, batch_size, hidden_dim = states.size()
last_state = last_state.unsqueeze(0).expand(sequence_length, batch_size, last_state.size(1))
if self.mode == "dot":
energies = last_state * states
energies = energies.sum(dim=2).squeeze()
elif self.mode == "general":
expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
energies = last_state * states.bmm(expanded_projection)
energies = energies.sum(dim=2).squeeze()
elif self.mode == "concat":
expanded_reduction = self.reduction.expand(sequence_length, *self.reduction.size())
expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
energies = F.tanh(torch.cat([last_state, states], dim=2).bmm(expanded_reduction))
energies = energies.bmm(expanded_projection).squeeze()
if type(mask) == torch.autograd.Variable:
energies = energies + ((mask == 0).float() * -10000)
attention_weights = F.softmax(energies)
return attention_weights
def forward(self, input, hx):
h, c = hx
pre = F.linear(input, self.weight_ih, self.bias) \
+ F.linear(h, self.weight_hh)
pre = sparsify_grad(pre, self.k, self.simplified)
if self.grad_clip:
pre = clip_grad(pre, -self.grad_clip, self.grad_clip)
i = F.sigmoid(pre[:, :self.hidden_size])
f = F.sigmoid(pre[:, self.hidden_size: self.hidden_size * 2])
g = F.tanh(pre[:, self.hidden_size * 2: self.hidden_size * 3])
o = F.sigmoid(pre[:, self.hidden_size * 3:])
c = f * c + i * g
h = o * F.tanh(c)
return h, c