def validate(models, dataset, arg, cuda=False):
criterion = nn.MSELoss()
losses = []
batcher = dataset.get_batcher(shuffle=True, augment=False)
for b, (x, y) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
y = V(th.from_numpy(y).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
loss = criterion(logit, y)
losses.append(loss.data[0])
return np.mean(losses)
python类div()的实例源码
def predict(models, dataset, arg, cuda=False):
prediction_file = open('save/predictions.txt', 'w')
batcher = dataset.get_batcher(shuffle=False, augment=False)
for b, (x, _) in enumerate(batcher, 1):
x = V(th.from_numpy(x).float()).cuda()
# Ensemble average
logit = None
for model, _ in models:
model.eval()
logit = model(x) if logit is None else logit + model(x)
logit = th.div(logit, len(models))
prediction = logit.cpu().data[0][0]
prediction_file.write('%s\n' % prediction)
if arg.verbose and b % 100 == 0:
print('[predict] [b]:%s - prediction: %s' % (b, prediction))
# prediction_file.close()
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers):
# TODO: Calculate batchnorm using GPU Tensors.
assert len(hat_z_layers) == len(z_pre_layers)
hat_z_layers_normalized = []
for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)):
if self.use_cuda:
ones = Variable(torch.ones(z_pre.size()[0], 1).cuda())
else:
ones = Variable(torch.ones(z_pre.size()[0], 1))
mean = torch.mean(z_pre, 0)
noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size())
if self.use_cuda:
var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
else:
var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1])
var = Variable(torch.FloatTensor(var))
if self.use_cuda:
hat_z = hat_z.cpu()
ones = ones.cpu()
mean = mean.cpu()
hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10)))
if self.use_cuda:
hat_z_normalized = hat_z_normalized.cuda()
hat_z_layers_normalized.append(hat_z_normalized)
return hat_z_layers_normalized
def train(epoch):
for e_ in range(epoch):
if (e_ + 1) % 10 == 0:
adjust_learning_rate(optimizer, e_)
cnt = 0
loss = Variable(torch.Tensor([0]))
for i_q, i_k, i_v, i_cand, i_a in zip(train_q, train_key,train_value, train_cand, train_a):
cnt += 1
i_q = i_q.unsqueeze(0) # add dimension
probs = model.forward(i_q, i_k, i_v,i_cand)
i_a = Variable(i_a)
curr_loss = loss_function(probs, i_a)
loss = torch.add(loss, torch.div(curr_loss, config.batch_size))
# naive batch implemetation, the lr is divided by batch size
if cnt % config.batch_size == 0:
print "Training loss", loss.data.sum()
loss.backward()
optimizer.step()
loss = Variable(torch.Tensor([0]))
model.zero_grad()
if cnt % config.valid_every == 0:
print "Accuracy:",eval()
def train(epoch):
for e_ in range(epoch):
if (e_ + 1) % 10 == 0:
adjust_learning_rate(optimizer, e_)
cnt = 0
loss = Variable(torch.Tensor([0]))
for i_q, i_w, i_e_p, i_a in zip(train_q, train_w, train_e_p, train_a):
cnt += 1
i_q = i_q.unsqueeze(0) # add dimension
probs = model.forward(i_q, i_w, i_e_p)
i_a = Variable(i_a)
curr_loss = loss_function(probs, i_a)
loss = torch.add(loss, torch.div(curr_loss, config.batch_size))
# naive batch implemetation, the lr is divided by batch size
if cnt % config.batch_size == 0:
print "Training loss", loss.data.sum()
loss.backward()
optimizer.step()
loss = Variable(torch.Tensor([0]))
model.zero_grad()
if cnt % config.valid_every == 0:
print "Accuracy:",eval()
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(self.buffer, input)
torch.max(self.norm, self._indices, self.buffer, 1)
self.norm.add_(self.eps)
else:
self.normp = self.normp or input.new()
if self.p % 2 != 0:
torch.abs(self.buffer, input).pow_(self.p)
else:
torch.pow(self.buffer, input, self.p)
torch.sum(self.normp, self.buffer, 1).add_(self.eps)
torch.pow(self.norm, self.normp, 1./self.p)
torch.div(self._output, input, self.norm.view(-1, 1).expand_as(input))
self.output = self._output.view(input_size)
return self.output
def updateGradInput(self, input, gradOutput):
if not self.gradInput:
return
self._div = self._div or input.new()
self._output = self._output or self.output.new()
self._gradOutput = self._gradOutput or input.new()
self._expand3 = self._expand3 or input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(self._div, gradOutput, self._output)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat2, self._repeat, self._expand3)
torch.sum(self.gradInput, self._repeat2, 2)
self.gradInput.resize_as_(input)
return self.gradInput
def forward(self, input1, input2, y):
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=self.w1)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=self.w22).add_(epsilon)
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=self.w32).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)
output = self._outputs.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def forward(self, input1, input2, y):
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
_idx = input1.new().byte()
buffer = torch.mul(input1, input2)
torch.sum(buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=buffer)
torch.sum(buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=buffer)
torch.sum(buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=_idx)
self._outputs[_idx] = self._outputs[_idx].mul_(-1).add_(1)
output = self._outputs.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def forward(self, dec_state, context, mask=None):
"""
:param dec_state: batch x dec_dim
:param context: batch x T x enc_dim
:return: Weighted context, batch x enc_dim
Alpha weights (viz), batch x T
"""
batch, source_l, enc_dim = context.size()
assert enc_dim == self.enc_dim
# W*s over the entire batch (batch, attn_dim)
dec_contrib = self.decoder_in(dec_state)
# W*h over the entire length & batch (batch, source_l, attn_dim)
enc_contribs = self.encoder_in(
context.view(-1, self.enc_dim)).view(batch, source_l, self.attn_dim)
# tanh( Wh*hj + Ws s_{i-1} ) (batch, source_l, dim)
pre_attn = F.tanh(enc_contribs + dec_contrib.unsqueeze(1).expand_as(enc_contribs))
# v^T*pre_attn for all batches/lengths (batch, source_l)
energy = self.att_linear(pre_attn.view(-1, self.attn_dim)).view(batch, source_l)
# Apply the mask. (Might be a better way to do this)
if mask is not None:
shift = energy.max(1)[0]
energy_exp = (energy - shift.expand_as(energy)).exp() * mask
alpha = torch.div(energy_exp, energy_exp.sum(1).expand_as(energy_exp))
else:
alpha = F.softmax(energy)
weighted_context = torch.bmm(alpha.unsqueeze(1), context).squeeze(1) # (batch, dim)
return weighted_context, alpha
def rotation_error(input, target):
x1 = torch.norm(input, dim=1)
x2 = torch.norm(target, dim=1)
x1 = torch.div(input, torch.stack((x1, x1, x1, x1), dim=1))
x2 = torch.div(target, torch.stack((x2, x2, x2, x2), dim=1))
d = torch.abs(torch.sum(x1 * x2, dim=1))
theta = 2 * torch.acos(d) * 180/math.pi
theta = torch.mean(theta)
return theta
def rotation_error(input, target):
"""Gets cosine distance between input and target """
x1 = torch.norm(input, dim=1)
x2 = torch.norm(target, dim=1)
x1 = torch.div(input, torch.stack((x1, x1, x1, x1), dim=1))
x2 = torch.div(target, torch.stack((x2, x2, x2, x2), dim=1))
d = torch.abs(torch.sum(x1 * x2, dim=1))
theta = 2 * torch.acos(d) * 180/math.pi
theta = torch.mean(theta)
return theta
def forward(self, inpt):
batch_size = self.batch_size
f0 = self.features(inpt[:, 0])
f0 = f0.view(batch_size, -1)
f1 = self.features(inpt[:, 1])
f1 = f1.view(batch_size, -1)
# f2 = self.features(inpt[:, 2])
# f2 = f2.view(batch_size, -1)
#
# f3 = self.features(inpt[:, 3])
# f3 = f3.view(batch_size, -1)
#
# f4 = self.features(inpt[:, 4])
# f4 = f4.view(batch_size, -1)
#
# f = torch.stack((f0, f1, f2, f3, f4), dim=0).view(self.seq_length, batch_size, -1)
f = torch.cat((f0, f1), dim=1)
# _, hn = self.rnn(f, self.hidden)
# hn = hn[self.gru_layer - 1].view(batch_size, -1)
# hn = self.relu(hn)
# hn = self.dropout(hn)
# hn = self.regressor(hn)
hn = self.regressor(f)
trans = self.trans_regressor(hn)
# trans_norm = torch.norm(trans, dim=1)
# trans = torch.div(trans, torch.cat((trans_norm, trans_norm, trans_norm), dim=1))
scale = self.scale_regressor(hn)
rotation = self.rotation_regressor(hn)
return trans, scale, rotation
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def l2_norm(self,input):
input_size = input.size()
buffer = torch.pow(input, 2)
normp = torch.sum(buffer, 1).add_(1e-10)
norm = torch.sqrt(normp)
_output = torch.div(input, norm.view(-1, 1).expand_as(input))
output = _output.view(input_size)
return output
def l2_norm(self,input):
input_size = input.size()
buffer = torch.pow(input, 2)
normp = torch.sum(buffer, 1).add_(1e-10)
norm = torch.sqrt(normp)
_output = torch.div(input, norm.view(-1, 1).expand_as(input))
output = _output.view(input_size)
return output
def normalize_batch(batch):
# normalize using imagenet mean and std
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
batch = batch / Variable(std)
return batch
def batch_norm_scattering(x, m,v):
m=m.expand_as(x)
v=v.expand_as(x)
x = torch.div(torch.add(x,-m),v)
return x
def forward(self, input):
x = input
if x.data.is_cuda and self.gpuDevice != 0:
x = x.cuda(self.gpuDevice)
#
if x.size()[-1] == 128:
x = self.resize2(self.resize1(x))
x = self.layer8(self.layer7(self.layer6(self.layer5(
self.layer4(self.layer3(self.layer2(self.layer1(x))))))))
x = self.layer13(self.layer12(
self.layer11(self.layer10(self.layer9(x)))))
x = self.layer14(x)
x = self.layer15(x)
x = self.layer16(x)
x = self.layer17(x)
x = self.layer18(x)
x = self.layer19(x)
x = self.layer21(x)
x = self.layer22(x)
x = x.view((-1, 736))
x_736 = x
x = self.layer25(x)
x_norm = torch.sqrt(torch.sum(x**2, 1) + 1e-6)
x = torch.div(x, x_norm.view(-1, 1).expand_as(x))
return (x, x_736)
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def columnwise_cosine_similarity(matrix1, matrix2):
"""Return the columnwise cosine similarity from matrix1 and matrix2.
Expect tesor of dimension (batch_size, seq_len, hidden).
Return tensor of size (batch_size, seq_len) containing the cosine
similarities."""
assert matrix1.size() == matrix2.size(), 'matrix sizes do not match'
# -> (batch_size, seq_len, 1)
n_m1 = torch.norm(matrix1, 2, 2)
n_m2 = torch.norm(matrix2, 2, 2)
# -> (batch_size, seq_len, 1)
col_norm = torch.mul(n_m1, n_m2)
# -> (batch_size, seq_len, hidden)
colprod = torch.mul(matrix1, matrix2)
# -> (batch_size, seq_len, 1)
colsum = torch.sum(colprod, 2)
# -> (batch_size, seq_len, 1)
cosine_sim = torch.div(colsum, col_norm)
# -> (batch_size, seq_len)
cosine_sim = cosine_sim.squeeze()
return cosine_sim
def full_cosine_similarity(matrix1, matrix2):
"""
Expect 2 matrices P and Q of dimension (d, n1) and (d, n2) respectively.
Return a matrix A of dimension (n1, n2) with the result of comparing each
vector to one another. A[i, j] represents the cosine similarity between
vectors P[:, i] and Q[:, j].
"""
n1 = matrix1.size(1)
n2 = matrix2.size(1)
d = matrix1.size(0)
assert d == matrix2.size(0)
# -> (d, n1, 1)
t1 = matrix1.view(d, n1, 1)
# -> (d, n1, n2)
t1 = t1.repeat(1, 1, n2)
# -> (d, 1, n2)
t2 = matrix2.view(d, 1, n2)
# -> (d, n1, n2)
t2 = t2.repeat(1, n1, 1).contiguous()
t1_x_t2 = torch.mul(t1, t2) # (d, n1, n2)
dotprod = torch.sum(t1_x_t2, 0).squeeze() # (n1, n2)
norm1 = torch.norm(t1, 2, 0) # (n1, n2)
norm2 = torch.norm(t2, 2, 0) # (n1, n2)
col_norm = torch.mul(norm1, norm2).squeeze() # (n1, n2)
return torch.div(dotprod, col_norm) # (n1, n2)
def batch_full_cosine_similarity(tensor1, tensor2):
"""
Expect 2 tensors tensor1 and tensor2 of dimension
(batch_size, seq_len_p, hidden) and (batch_size, seq_len_q, hidden)
respectively.
Return a matrix A of dimension (batch_size, seq_len_p, seq_len_q) with the
result of comparing each matrix to one another. A[k, :, :] represents the
cosine similarity between matrices P[k, :, :] and Q[k, :, :]. Then
A_k[i, j] is a scalar representing the cosine similarity between vectors
P_k[i, :] and Q_k[j, :]
"""
batch_size = tensor1.size(0)
seq_len_p = tensor1.size(1)
seq_len_q = tensor2.size(1)
hidden = tensor1.size(2)
assert batch_size == tensor2.size(0)
assert hidden == tensor2.size(2)
# -> (batch_size, seq_len_p, 1, hidden)
t1 = tensor1.unsqueeze(2)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t1 = t1.repeat(1, 1, seq_len_q, 1)
# -> (batch_size, 1, seq_len_q, hidden)
t2 = tensor2.unsqueeze(1)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t2 = t2.repeat(1, seq_len_p, 1, 1)
# -> (batch_size, seq_len_p, seq_len_q, hidden)
t1_x_t2 = torch.mul(t1, t2)
# -> (batch_size, seq_len_p, seq_len_q)
dotprod = torch.sum(t1_x_t2, 3).squeeze(3)
# norm1, norm2 and col_norm have dim (batch_size, seq_len_p, seq_len_q)
norm1 = torch.norm(t1, 2, 3)
norm2 = torch.norm(t2, 2, 3)
col_norm = torch.mul(norm1, norm2).squeeze(3)
return torch.div(dotprod, col_norm) # (batch_size, seq_len_p, seq_len_q)
def l2norm(X):
"""L2-normalize columns of X
"""
norm = torch.pow(X, 2).sum(dim=1, keepdim=True).sqrt()
X = torch.div(X, norm)
return X
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size + 2)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size + 2)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden
def forward(self, input_n, hidden, phi, nh):
self.batch_size = input_n.size()[0]
hidden = torch.cat((hidden, input_n), 2)
# Aggregate reresentations
h_conv = torch.div(torch.bmm(phi, hidden), nh)
hidden = hidden.view(-1, self.hidden_size + self.input_size)
h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
# h_conv has shape (batch_size, n, hidden_size + input_size)
m1 = (torch.mm(hidden, self.W1)
.view(self.batch_size, -1, self.hidden_size))
m2 = (torch.mm(h_conv, self.W2)
.view(self.batch_size, -1, self.hidden_size))
m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
hidden = torch.sigmoid(m1 + m2 + m3)
return hidden