def forward(self, x):
x = F.elu(F.max_pool2d(self.conv1(x), 2))
x = F.elu(F.max_pool2d(self.bn2(self.conv2(x)), 2))
x = F.elu(F.max_pool2d(self.bn3(self.conv3(x)), 2))
x = F.elu(F.max_pool2d(self.bn4(self.conv4(x)), 2))
x = x.view(-1, 750)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
python类elu()的实例源码
def forward(self, input):
# TODO perhaps add batch normalization or layer normalization
x = F.elu(self.conv1(input))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
# Next flatten the output to be batched into LSTM layers
# The shape of x is batch_size, channels, height, width
x = self.pre_lstm_bn(x)
x = torch.transpose(x, 1, 3)
x = torch.transpose(x, 1, 2)
x = x.contiguous()
x = x.view(x.size(0), self.batch, self.hidden_dim)
x, hidden = self.lstm(x, (self.hidden_state, self.cell_state))
self.hidden_state, self.cell_state = hidden
x = torch.transpose(x, 2, 1)
x = x.contiguous()
x = x.view(x.size(0), self.hidden_dim, self.height, self.width)
x = self.lstm_batch_norm(x)
x = F.elu(self.conv4(x))
x = F.elu(self.conv5(x))
o_begin = self.begin_conv(x)
o_end = self.end_conv(x)
o_begin = o_begin.view(o_begin.size(0), -1)
o_end = o_end.view(o_end.size(0), -1)
o_begin = F.log_softmax(o_begin)
o_end = F.log_softmax(o_end)
return o_begin, o_end
def forward(self, input):
x = F.elu(self.conv1(input))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
# Next flatten the output to be batched into LSTM layers
# The shape of x is batch_size, channels, height, width
x = self.pre_lstm_bn(x)
x = torch.transpose(x, 1, 3)
x = torch.transpose(x, 1, 2)
x = x.contiguous()
x = x.view(x.size(0), self.batch, self.hidden_dim)
x, hidden = self.lstm(x, (self.hidden_state, self.cell_state))
self.hidden_state, self.cell_state = hidden
x = torch.transpose(x, 2, 1)
x = x.contiguous()
x = x.view(x.size(0), self.hidden_dim, self.height, self.width)
x = self.lstm_batch_norm(x)
x = F.elu(self.conv4(x))
x = F.elu(self.conv5(x))
logit = self.move_conv(x)
logit = logit.view(logit.size(0), -1)
x = self.value_conv(x)
x = x.view(x.size(0), self.hidden_dim, self.batch)
x = F.max_pool1d(x, self.batch)
x = x.squeeze()
val = self.value_linear(x)
return val, logit
def forward(self, x):
x = self.bn(x)
x = F.elu(x, inplace=True)
x = self.conv(x)
if self.dropout is not None:
x = self.drop(x)
return x
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
# noinspection PyTypeChecker
return scale * where(x >= 0, x, alpha * F.elu(x))
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale * F.elu(x, alpha)
def __init__(self, in_chans,
n_classes,
input_time_length,
final_conv_length,
n_filters_time=25,
n_filters_spat=25,
filter_time_length=10,
pool_time_length=3,
pool_time_stride=3,
n_filters_2=50,
filter_length_2=10,
n_filters_3=100,
filter_length_3=10,
n_filters_4=200,
filter_length_4=10,
first_nonlin=elu,
first_pool_mode='max',
first_pool_nonlin=identity,
later_nonlin=elu,
later_pool_mode='max',
later_pool_nonlin=identity,
drop_prob=0.5,
double_time_convs=False,
split_first_layer=True,
batch_norm=True,
batch_norm_alpha=0.1,
stride_before_pool=False):
if final_conv_length == 'auto':
assert input_time_length is not None
self.__dict__.update(locals())
del self.self
def forward(self, image_pairs: Variable) -> Variable:
arc_out = self.arc(image_pairs)
d1 = F.elu(self.dense1(arc_out))
decision = torch.sigmoid(self.dense2(d1))
return decision
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.linear1(inputs))
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
def forward(self, x):
assert x.size(2) == 42 and x.size(3) == 42
x = F.elu(self.conv1(x))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# hack length from mask
# we do not hack mask from length for special reasons.
# Thus, always provide mask if it is necessary.
if length is None and mask is not None:
length = mask.data.sum(dim=1).long()
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# apply dropout
input = self.dropout_in(input)
# prepare packed_sequence
if length is not None:
seq_input, hx, rev_order, mask = utils.prepare_rnn_seq(input, length, hx=hx, masks=mask, batch_first=True)
seq_output, hn = self.rnn(seq_input, hx=hx)
output, hn = utils.recover_rnn_seq(seq_output, rev_order, hx=hn, batch_first=True)
else:
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, hx=hx)
output = self.dropout_rnn(output)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, mask, hx=hx)
# apply dropout for the output of rnn
output = self.dropout_rnn(output.transpose(1, 2)).transpose(1, 2)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
x = x.view(-1, 32 * 3 * 3)
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
def test_elu_inplace_view(self):
v = Variable(torch.Tensor([1.0, -1.0, 1.0, -1.0]), requires_grad=True)
def func(root):
x = root.clone()
view = x.narrow(0, 1, 2)
res = F.elu(view, inplace=True)
self.assertIs(res, view)
return x
gradcheck(func, [v])
gradgradcheck(func, [v])
def test_elu_inplace_gradgrad(self):
v = Variable(torch.randn(8), requires_grad=True)
def func(root):
x = root.clone()
return F.elu(x, inplace=True)
gradcheck(func, [v])
gradgradcheck(func, [v])
def forward(self, inputs):
inputs, (hx, cx) = inputs
# print (inputs.size())
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
# print (x.size())
# x = x.view(-1, 32 * 3 * 3)
x = x.view(-1, 32 * 2 * 2)
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
def elu(x, alpha=1.):
def _elu(x, alpha=alpha):
return F.elu(x)
return get_op(_elu, arguments=[alpha])(x)
def forward(self, inputs):
x = F.elu(self.linear1(inputs))
x = F.elu(self.linear2(x))
x = F.elu(self.linear3(x))
return self.actor_linear(x)
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
x = x.view(-1, 32 * 3 * 3)
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
def selu(x, inplace=False):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
temp1 = scale * F.relu(x)
temp2 = scale * alpha * (F.elu(-1*F.relu(-1*x)))
return temp1 + temp2
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale * F.elu(x, alpha)
def _get_rnn_output(self, input_word, input_char, input_pos, mask=None, length=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, pos_dim]
pos = self.pos_embedd(input_pos)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# apply dropout on input
word = self.dropout_in(word)
pos = self.dropout_in(pos)
char = self.dropout_in(char)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char, pos], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, mask, hx=hx)
# apply dropout for output
# [batch, length, hidden_size] --> [batch, hidden_size, length] --> [batch, length, hidden_size]
output = self.dropout_out(output.transpose(1, 2)).transpose(1, 2)
# output size [batch, length, arc_space]
arc_h = F.elu(self.arc_h(output))
arc_c = F.elu(self.arc_c(output))
# output size [batch, length, type_space]
type_h = F.elu(self.type_h(output))
type_c = F.elu(self.type_c(output))
# apply dropout
# [batch, length, dim] --> [batch, 2 * length, dim]
arc = torch.cat([arc_h, arc_c], dim=1)
type = torch.cat([type_h, type_c], dim=1)
arc = self.dropout_out(arc.transpose(1, 2)).transpose(1, 2)
arc_h, arc_c = arc.chunk(2, 1)
type = self.dropout_out(type.transpose(1, 2)).transpose(1, 2)
type_h, type_c = type.chunk(2, 1)
type_h = type_h.contiguous()
type_c = type_c.contiguous()
return (arc_h, arc_c), (type_h, type_c), hn, mask, length
def decode(self, input_word, input_char, input_pos, mask=None, length=None, hx=None, beam=1, leading_symbolic=0, ordered=True):
# reset noise for decoder
self.decoder.reset_noise(0)
# output from encoder [batch, length_encoder, tag_space]
# src_encoding [batch, length, input_size]
# arc_c [batch, length, arc_space]
# type_c [batch, length, type_space]
# hn [num_direction, batch, hidden_size]
src_encoding, output_enc, hn, mask, length = self._get_encoder_output(input_word, input_char, input_pos, mask_e=mask, length_e=length, hx=hx)
# output size [batch, length_encoder, arc_space]
arc_c = F.elu(self.arc_c(output_enc))
# output size [batch, length_encoder, type_space]
type_c = F.elu(self.type_c(output_enc))
hn = self._transform_decoder_init_state(hn)
batch, max_len_e, _ = src_encoding.size()
heads = np.zeros([batch, max_len_e], dtype=np.int32)
types = np.zeros([batch, max_len_e], dtype=np.int32)
children = np.zeros([batch, 2 * max_len_e - 1], dtype=np.int32)
stack_types = np.zeros([batch, 2 * max_len_e - 1], dtype=np.int32)
for b in range(batch):
sent_len = None if length is None else length[b]
# hack to handle LSTM
if isinstance(hn, tuple):
hx, cx = hn
hx = hx[:, b, :].contiguous()
cx = cx[:, b, :].contiguous()
hx = (hx, cx)
else:
hx = hn[:, b, :].contiguous()
preds = self._decode_per_sentence(src_encoding[b], output_enc[b], arc_c[b], type_c[b], hx, sent_len, beam, ordered, leading_symbolic)
if preds is None:
preds = self._decode_per_sentence(src_encoding[b], output_enc[b], arc_c[b], type_c[b], hx, sent_len, beam, False, leading_symbolic)
hids, tids, sent_len, chids, stids = preds
heads[b, :sent_len] = hids
types[b, :sent_len] = tids
children[b, :2 * sent_len - 1] = chids
stack_types[b, :2 * sent_len - 1] = stids
return heads, types, children, stack_types