def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
python类tanh()的实例源码
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def forward(self, input, VGG):
x1 = F.leaky_relu(self.down1(input), 0.2, True)
x2 = F.leaky_relu(self.down2(x1), 0.2, True)
x3 = F.leaky_relu(self.down3(x2), 0.2, True)
x4 = F.leaky_relu(self.down4(x3), 0.2, True)
x5 = F.leaky_relu(self.down5(x4), 0.2, True)
x6 = F.leaky_relu(self.down6(x5), 0.2, True)
x7 = F.leaky_relu(self.down7(x6), 0.2, True)
x8 = F.relu(self.down8(x7), True)
VGG = F.relu(self.linear(VGG), True)
x = F.relu(self.up8(torch.cat([x8, VGG.view(-1, 2048, 1, 1)], 1)), True)
x = F.relu(self.up7(torch.cat([x, x7], 1)), True)
x = F.relu(self.up6(torch.cat([x, x6], 1)), True)
x = F.relu(self.up5(torch.cat([x, x5], 1)), True)
x = F.relu(self.up4(torch.cat([x, x4], 1)), True)
x = F.relu(self.up3(torch.cat([x, x3], 1)), True)
x = F.relu(self.up2(torch.cat([x, x2], 1)), True)
x = F.tanh(self.up1(torch.cat([x, x1], 1)))
return x
############################
# D network
###########################
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def forward(self, x, hint):
v = self.toH(hint)
x0 = self.to0(x)
x1 = self.to1(x0)
x2 = self.to2(x1)
x3 = self.to3(torch.cat([x2, v], 1))
x4 = self.to4(x3)
x = self.tunnel4(x4)
x = self.tunnel3(torch.cat([x, x3.detach()], 1))
x = self.tunnel2(torch.cat([x, x2.detach()], 1))
x = self.tunnel1(torch.cat([x, x1.detach()], 1))
x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1)))
return x
def node_forward(self, inputs, child_c, child_h):
child_h_sum = torch.sum(child_h, dim=0, keepdim=True)
iou = self.ioux(inputs) + self.iouh(child_h_sum)
i, o, u = torch.split(iou, iou.size(1) // 3, dim=1)
i, o, u = F.sigmoid(i), F.sigmoid(o), F.tanh(u)
f = F.sigmoid(
self.fh(child_h) +
self.fx(inputs).repeat(len(child_h), 1)
)
fc = torch.mul(f, child_c)
c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True)
h = torch.mul(o, F.tanh(c))
return c, h
def forward(self, input, source_hids):
# input: bsz x input_embed_dim
# source_hids: srclen x bsz x output_embed_dim
# x: bsz x output_embed_dim
x = self.input_proj(input)
# compute attention
attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz
# sum weighted sources
x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
return x, attn_scores
def forward(self, x):
en0 = self.c0(x)
en1 = self.bnc1(self.c1(F.leaky_relu(en0, negative_slope=0.2)))
en2 = self.bnc2(self.c2(F.leaky_relu(en1, negative_slope=0.2)))
en3 = self.bnc3(self.c3(F.leaky_relu(en2, negative_slope=0.2)))
en4 = self.bnc4(self.c4(F.leaky_relu(en3, negative_slope=0.2)))
en5 = self.bnc5(self.c5(F.leaky_relu(en4, negative_slope=0.2)))
en6 = self.bnc6(self.c6(F.leaky_relu(en5, negative_slope=0.2)))
en7 = self.c7(F.leaky_relu(en6, negative_slope=0.2))
de7 = self.bnd7(self.d7(F.relu(en7)))
de6 = F.dropout(self.bnd6(self.d6(F.relu(torch.cat((en6, de7),1)))))
de5 = F.dropout(self.bnd5(self.d5(F.relu(torch.cat((en5, de6),1)))))
de4 = F.dropout(self.bnd4(self.d4(F.relu(torch.cat((en4, de5),1)))))
de3 = self.bnd3(self.d3(F.relu(torch.cat((en3, de4),1))))
de2 = self.bnd2(self.d2(F.relu(torch.cat((en2, de3),1))))
de1 = self.bnd1(self.d1(F.relu(torch.cat((en1, de2),1))))
de0 = F.tanh(self.d0(F.relu(torch.cat((en0, de1),1))))
return de0
def forward(self, inputs, states, masks):
x = self.v_fc1(inputs)
x = F.tanh(x)
x = self.v_fc2(x)
x = F.tanh(x)
x = self.v_fc3(x)
value = x
x = self.a_fc1(inputs)
x = F.tanh(x)
x = self.a_fc2(x)
x = F.tanh(x)
return value, x, states
model_CNN.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def forward(self, x):
x = self.embed(x) # (N,W,D)
x = self.dropout_embed(x)
x = x.unsqueeze(1) # (N,Ci,W,D)
if self.args.batch_normalizations is True:
x = [self.convs1_bn(F.tanh(conv(x))).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks)
else:
# x = [self.dropout(F.relu(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
# x = [self.dropout(F.tanh(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
# x = [F.tanh(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks)
x = torch.cat(x, 1)
x = self.dropout(x) # (N,len(Ks)*Co)
if self.args.batch_normalizations is True:
x = self.fc1_bn(self.fc1(x))
logit = self.fc2_bn(self.fc2(F.tanh(x)))
else:
logit = self.fc(x)
return logit
model_GRU.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def forward(self, input):
self.hidden = self.init_hidden(self.num_layers, input.size(1))
embed = self.embed(input)
input = embed.view(len(input), embed.size(1), -1)
# lstm
# print(input)
# print("a", self.hidden)
lstm_out, hidden = self.gru(input, self.hidden)
lstm_out = torch.transpose(lstm_out, 0, 1)
lstm_out = torch.transpose(lstm_out, 1, 2)
# pooling
lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2)
lstm_out = F.tanh(lstm_out)
# linear
y = self.hidden2label(lstm_out)
logit = y
return logit
model_BiLSTM_1.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def forward(self, x):
x = self.embed(x)
x = self.dropout_embed(x)
# x = x.view(len(x), x.size(1), -1)
# x = embed.view(len(x), embed.size(1), -1)
bilstm_out, self.hidden = self.bilstm(x, self.hidden)
# print(self.hidden)
bilstm_out = torch.transpose(bilstm_out, 0, 1)
bilstm_out = torch.transpose(bilstm_out, 1, 2)
bilstm_out = F.tanh(bilstm_out)
bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2)
bilstm_out = F.tanh(bilstm_out)
# bilstm_out = self.dropout(bilstm_out)
# bilstm_out = self.hidden2label1(bilstm_out)
# logit = self.hidden2label2(F.tanh(bilstm_out))
logit = self.hidden2label(bilstm_out)
return logit
model_CLSTM.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def forward(self, x):
embed = self.embed(x)
# CNN
cnn_x = embed
cnn_x = self.dropout(cnn_x)
cnn_x = cnn_x.unsqueeze(1)
cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks)
cnn_x = torch.cat(cnn_x, 0)
cnn_x = torch.transpose(cnn_x, 1, 2)
# LSTM
lstm_out, self.hidden = self.lstm(cnn_x, self.hidden)
lstm_out = torch.transpose(lstm_out, 0, 1)
lstm_out = torch.transpose(lstm_out, 1, 2)
lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2)
# linear
cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out))
cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out))
# output
logit = cnn_lstm_out
return logit
model_CBiLSTM.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def forward(self, x):
embed = self.embed(x)
# CNN
embed = self.dropout(embed)
cnn_x = embed
cnn_x = cnn_x.unsqueeze(1)
cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks)
cnn_x = torch.cat(cnn_x, 0)
cnn_x = torch.transpose(cnn_x, 1, 2)
# BiLSTM
bilstm_out, self.hidden = self.bilstm(cnn_x, self.hidden)
bilstm_out = torch.transpose(bilstm_out, 0, 1)
bilstm_out = torch.transpose(bilstm_out, 1, 2)
bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2)
# linear
cnn_bilstm_out = self.hidden2label1(F.tanh(bilstm_out))
cnn_bilstm_out = self.hidden2label2(F.tanh(cnn_bilstm_out))
# dropout
logit = self.dropout(cnn_bilstm_out)
return logit
model_BiGRU.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def forward(self, input):
embed = self.embed(input)
embed = self.dropout(embed) # add this reduce the acc
input = embed.view(len(input), embed.size(1), -1)
# gru
gru_out, hidden = self.bigru(input, self.hidden)
gru_out = torch.transpose(gru_out, 0, 1)
gru_out = torch.transpose(gru_out, 1, 2)
# pooling
# gru_out = F.tanh(gru_out)
gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2)
gru_out = F.tanh(gru_out)
# linear
y = self.hidden2label(gru_out)
logit = y
return logit
model_CGRU.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def forward(self, x):
embed = self.embed(x)
# CNN
cnn_x = embed
cnn_x = self.dropout(cnn_x)
cnn_x = cnn_x.unsqueeze(1)
cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks)
cnn_x = torch.cat(cnn_x, 0)
cnn_x = torch.transpose(cnn_x, 1, 2)
# GRU
lstm_out, self.hidden = self.gru(cnn_x, self.hidden)
lstm_out = torch.transpose(lstm_out, 0, 1)
lstm_out = torch.transpose(lstm_out, 1, 2)
lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2)
# linear
cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out))
cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out))
# output
logit = cnn_lstm_out
return logit
def forward(self, input, source_hids):
# input: bsz x input_embed_dim
# source_hids: srclen x bsz x output_embed_dim
# x: bsz x output_embed_dim
x = self.input_proj(input)
# compute attention
attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz
# sum weighted sources
x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
return x, attn_scores
def forward(self, v, u):
"""
Input:
- v: N x D x H x W
- u: N x D
Returns:
- next_u: N x D
"""
N, K = v.size(0), self.hidden_dim
D, H, W = v.size(1), v.size(2), v.size(3)
v_proj = self.Wv(v) # N x K x H x W
u_proj = self.Wu(u) # N x K
u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W)
h = F.tanh(v_proj + u_proj_expand)
p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W)
self.attention_maps = p.data.clone()
v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D)
next_u = u + v_tilde
return next_u
def _combine_last(self, r, h_t):
'''
inputs:
r : batch x n_dim
h_t : batch x n_dim (this is the output from the gru unit)
params :
W_x : n_dim x n_dim
W_p : n_dim x n_dim
out :
h_star : batch x n_dim
'''
W_p_r = torch.mm(r, self.W_p) # batch x n_dim
W_x_h = torch.mm(h_t, self.W_x) # batch x n_dim
h_star = F.tanh(W_p_r + W_x_h) # batch x n_dim
return h_star
def KrauseLSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
# Terminology matchup:
# - This implementation uses the trick of having all gates concatenated
# together into a single tensor, so you can do one matrix multiply to
# compute all the gates.
# - Thus, w_ih holds W_hx, W_ix, W_ox, W_fx
# and w_hh holds W_hh, W_ih, W_oh, W_fh
# - Notice that the indices are swapped, because F.linear has swapped
# arguments. "Cancelling" indices are always next to each other.
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, hiddengate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
outgate = F.sigmoid(outgate)
forgetgate = F.sigmoid(forgetgate)
cy = (forgetgate * cx) + (ingate * hiddengate)
hy = F.tanh(cy * outgate)
return hy, cy
def MultiplicativeLSTMCell(input, hidden, w_xm, w_hm, w_ih, w_mh, b_xm=None, b_hm=None, b_ih=None, b_mh=None):
# w_ih holds W_hx, W_ix, W_ox, W_fx
# w_mh holds W_hm, W_im, W_om, W_fm
hx, cx = hidden
# Key difference:
m = F.linear(input, w_xm, b_xm) * F.linear(hx, w_hm, b_hm)
gates = F.linear(input, w_ih, b_ih) + F.linear(m, w_mh, b_mh)
ingate, forgetgate, hiddengate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
outgate = F.sigmoid(outgate)
forgetgate = F.sigmoid(forgetgate)
cy = (forgetgate * cx) + (ingate * hiddengate)
hy = F.tanh(cy * outgate)
return hy, cy
def forward(self, x):
upblock = True
# Downsizing layer - Large Kernel ensures large receptive field on the residual blocks
h = F.relu(self.b2(self.c1(x)))
# Residual Layers
for r in self.rs:
h = r(h) # will go through all residual blocks in this loop
if upblock:
# Upsampling Layers - improvement suggested by [2] to remove "checkerboard pattern"
for u in self.up:
h = u(h) # will go through all upsampling blocks in this loop
else:
# As recommended by [1]
h = F.relu(self.bc2(self.dc2(h)))
h = F.relu(self.bc3(self.dc3(h)))
# Last layer and scaled tanh activation - Scaled from 0 to 1 instead of 0 - 255
h = F.tanh(self.c3(h))
h = torch.add(h, 1.)
h = torch.mul(h, 0.5)
return h
def forward(self, input, context):
"""Propogate input through the network.
input: batch x dim
context: batch x sourceL x dim
"""
target = self.linear_in(input).unsqueeze(2) # batch x dim x 1
# Get attention
attn = torch.bmm(context, target).squeeze(2) # batch x sourceL
attn = self.sm(attn)
attn3 = attn.view(attn.size(0), 1, attn.size(1)) # batch x 1 x sourceL
weighted_context = torch.bmm(attn3, context).squeeze(1) # batch x dim
h_tilde = torch.cat((weighted_context, input), 1)
h_tilde = self.tanh(self.linear_out(h_tilde))
return h_tilde, attn
def decoder(self, z, sc_feat32, sc_feat16, sc_feat8, sc_feat4):
x = z.view(-1, self.hidden_size, 1, 1)
x = self.dec_upsamp1(x)
x = torch.cat([x, sc_feat4], 1)
x = F.relu(self.dec_conv1(x))
x = self.dec_bn1(x)
x = self.dec_upsamp2(x)
x = torch.cat([x, sc_feat8], 1)
x = F.relu(self.dec_conv2(x))
x = self.dec_bn2(x)
x = self.dec_upsamp3(x)
x = torch.cat([x, sc_feat16], 1)
x = F.relu(self.dec_conv3(x))
x = self.dec_bn3(x)
x = self.dec_upsamp4(x)
x = torch.cat([x, sc_feat32], 1)
x = F.relu(self.dec_conv4(x))
x = self.dec_bn4(x)
x = self.dec_upsamp5(x)
x = F.tanh(self.dec_conv5(x))
return x
#define forward pass
def calc_score(self, att_query, att_keys):
"""
att_query is: b x t_q x n
att_keys is b x t_k x n
return b x t_q x t_k scores
"""
b, t_k, n = list(att_keys.size())
t_q = att_query.size(1)
if self.mode == 'bahdanau':
att_query = att_query.unsqueeze(2).expand(b, t_q, t_k, n)
att_keys = att_keys.unsqueeze(1).expand(b, t_q, t_k, n)
sum_qk = att_query + att_keys
sum_qk = sum_qk.view(b * t_k * t_q, n)
out = self.linear_att(F.tanh(sum_qk)).view(b, t_q, t_k)
elif self.mode == 'dot_prod':
out = torch.bmm(att_query, att_keys.transpose(1, 2))
if self.normalize:
out.div_(n ** 0.5)
return out
def forward(self, prior):
prior = prior.cuda()
fc_layer = leaky_relu(self.linear1(prior).view(-1, 512, 4, 4), negative_slope = 0.2)
deconv_layer1 = self.bn1(leaky_relu(self.deconv1(fc_layer), negative_slope = 0.2))
deconv_layer2 = self.bn2(leaky_relu(self.deconv2(deconv_layer1), negative_slope = 0.2))
deconv_layer3 = tanh(self.deconv3(deconv_layer2))
return deconv_layer3
# Infer without batch normalization cannot improve image quality
# def infer(self, prior):
# prior = prior.cuda()
# fc_layer = leaky_relu(self.linear1(prior).view(-1, 512, 4, 4), negative_slope = 0.2)
# deconv_layer1 = leaky_relu(self.deconv1(fc_layer), negative_slope = 0.2)
# deconv_layer2 = leaky_relu(self.deconv2(deconv_layer1), negative_slope = 0.2)
# deconv_layer3 = tanh(self.deconv3(deconv_layer2))
# return deconv_layer3
def forward(self, inp):
#if inp.dim() > 2:
# inp = inp.permute(0, 2, 1)
#inp = inp.contiguous().view(-1, self.L)
if not (type(inp) == Variable):
inp = Variable(inp[0])
if hasattr(self.arguments, 'pack_num'):
N = inp.size(0)
Ncut = int(N/self.arguments.pack_num)
split = torch.split(inp, Ncut, dim=0)
inp = torch.cat(split, dim=1)
h1 = F.tanh((self.l1(inp)))
#h2 = F.tanh(self.l2_bn(self.l2(h1)))
if self.arguments.tr_method == 'adversarial_wasserstein':
output = (self.l3(h1))
else:
output = F.sigmoid(self.l3(h1))
return output, h1