def forward(self, input, target):
buffer = input.new()
buffer.resize_as_(input).copy_(input)
buffer[torch.eq(target, -1.)] = 0
output = buffer.sum()
buffer.fill_(self.margin).add_(-1, input)
buffer.clamp_(min=0)
buffer[torch.eq(target, 1.)] = 0
output += buffer.sum()
if self.size_average:
output = output / input.nelement()
self.save_for_backward(input, target)
return input.new((output,))
python类sum()的实例源码
def forward(self, input1, input2, weight, bias=None):
self.save_for_backward(input1, input2, weight, bias)
output = input1.new(input1.size(0), weight.size(0))
buff = input1.new()
# compute output scores:
for k, w in enumerate(weight):
torch.mm(input1, w, out=buff)
buff.mul_(input2)
torch.sum(buff, 1, out=output.narrow(1, k, 1))
if bias is not None:
output.add_(bias.expand_as(output))
return output
def _test_btrisolve(self, cast):
a = torch.FloatTensor((((1.3722, -0.9020),
(1.8849, 1.9169)),
((0.7187, -1.1695),
(-0.0139, 1.3572)),
((-1.6181, 0.7148),
(1.3728, 0.1319))))
b = torch.FloatTensor(((4.02, 6.19),
(-1.56, 4.00),
(9.81, -4.09)))
a, b = cast(a), cast(b)
info = cast(torch.IntTensor())
LU_data, pivots = a.btrifact(info=info)
self.assertEqual(info.abs().sum(), 0)
x = torch.btrisolve(b, LU_data, pivots)
b_ = torch.bmm(a, x.unsqueeze(2)).squeeze()
self.assertEqual(b_, b)
def test_bernoulli(self):
t = torch.ByteTensor(10, 10)
def isBinary(t):
return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum() == 0
p = 0.5
t.bernoulli_(p)
self.assertTrue(isBinary(t))
p = torch.rand(SIZE)
t.bernoulli_(p)
self.assertTrue(isBinary(t))
q = torch.rand(5, 5)
self.assertTrue(isBinary(q.bernoulli()))
def cost_matrix(x, y, p=2):
"Returns the matrix of $|x_i-y_j|^p$."
x_col = x.unsqueeze(1)
y_lin = y.unsqueeze(0)
c = torch.sum((torch.abs(x_col - y_lin)) ** p, 2)
return c
def gauss_log_prob(means, logstds, x):
var = th.exp(2 * logstds)
top = (-(x - means)**2)
bottom = (2 * var) - 0.5 * LOG2PI - logstds
gp = top / bottom
return th.sum(gp, dim=1)
def dot_not_flat(A, B):
"""Equivalent of flattening matrices A, B and doing a vector product."""
return sum([th.sum(a * b) for a, b in zip(A, B)])
train.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 44
收藏 0
点赞 0
评论 0
def Frobenius(mat):
size = mat.size()
if len(size) == 3: # batched matrix
ret = (torch.sum(torch.sum((mat ** 2), 1), 2).squeeze() + 1e-10) ** 0.5
return torch.sum(ret) / size[0]
else:
raise Exception('matrix for computing Frobenius norm should be with 3 dims')
def grad_variance(self):
global_state = self._global_state
beta = self._beta
self._grad_var = np.array(0.0, dtype=np.float32)
for group_id, group in enumerate(self._optimizer.param_groups):
for p_id, p in enumerate(group['params'] ):
if p.grad is None:
continue
grad = p.grad.data
state = self._optimizer.state[p]
if self._iter == 0:
state["grad_avg"] = grad.new().resize_as_(grad).zero_()
state["grad_avg_squared"] = 0.0
state["grad_avg"].mul_(beta).add_(1 - beta, grad)
self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] )
if self._zero_debias:
debias_factor = self.zero_debias_factor()
else:
debias_factor = 1.0
self._grad_var /= -(debias_factor**2)
self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor
# in case of negative variance: the two term are using different debias factors
self._grad_var = max(self._grad_var, eps)
if self._sparsity_debias:
self._grad_var *= self._sparsity_avg
return
def attention_mul(rnn_outputs, att_weights):
attn_vectors = None
for i in range(rnn_outputs.size(0)):
h_i = rnn_outputs[i]
a_i = att_weights[i].unsqueeze(1).expand_as(h_i)
h_i = a_i * h_i
h_i = h_i.unsqueeze(0)
if(attn_vectors is None):
attn_vectors = h_i
else:
attn_vectors = torch.cat((attn_vectors,h_i),0)
return torch.sum(attn_vectors, 0)
def test_accuracy_mini_batch(tokens, features, labels, word_attn, sent_attn):
y_pred = get_predictions(tokens, features, word_attn, sent_attn)
y_pred = torch.gt(y_pred, 0.5)
correct = np.ndarray.flatten(y_pred.data.cpu().numpy())
labels = torch.gt(labels, 0.5)
labels = np.ndarray.flatten(labels.data.cpu().numpy())
num_correct = sum(correct == labels)
return float(num_correct) / len(correct)
def attention_mul(rnn_outputs, att_weights):
attn_vectors = None
for i in range(rnn_outputs.size(0)):
h_i = rnn_outputs[i]
a_i = att_weights[i].unsqueeze(1).expand_as(h_i)
h_i = a_i * h_i
h_i = h_i.unsqueeze(0)
if(attn_vectors is None):
attn_vectors = h_i
else:
attn_vectors = torch.cat((attn_vectors,h_i),0)
return torch.sum(attn_vectors, 0)
def test_accuracy_mini_batch(tokens, features, labels, word_attn, sent_attn):
y_pred = get_predictions(tokens, features, word_attn, sent_attn)
y_pred = torch.gt(y_pred, 0.5)
correct = np.ndarray.flatten(y_pred.data.cpu().numpy())
labels = torch.gt(labels, 0.5)
labels = np.ndarray.flatten(labels.data.cpu().numpy())
num_correct = sum(correct == labels)
return float(num_correct) / len(correct)
def weights_normal_init(model, dev=0.01):
if isinstance(model, list):
for m in model:
weights_normal_init(m, dev)
else:
for m in model.modules():
if isinstance(m, nn.Conv2d):
#print torch.sum(m.weight)
m.weight.data.normal_(0.0, dev)
if m.bias is not None:
m.bias.data.fill_(0.0)
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0.0, dev)
def pretrain(self, x, pt_epochs, verbose=True):
n = x.data.size()[0]
num_batches = n / self.batch_size
t = x
# Pre-train 1 autoencoder at a time
for i, ae_re in enumerate(self.autoencoders_ref):
# Get the current autoencoder
ae = getattr(self.sequential, ae_re)
# Getting encoded output from the previous autoencoder
if i > 0:
# Set the requires_grad to False so that backprop doesn't
# travel all the way back to the previous autoencoder
temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False)
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1])
temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data
t = temp
optimizer = SGD(ae.parameters(), lr=self.pre_lr)
# Pre-training
print("Pre-training Autoencoder:", i)
for ep in range(pt_epochs):
agg_cost = 0.
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
bt = t[start:end]
optimizer.zero_grad()
z = ae.encode(bt, add_noise=True)
z = ae.decode(z)
loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1)
cost = torch.mean(loss)
cost.backward()
optimizer.step()
agg_cost += cost
agg_cost /= num_batches
if verbose:
print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
def forward(self, input1, input2, y):
_output = input1.clone()
_output.add_(-1, input2)
_output.mul_(-1).mul_(y)
_output.add_(self.margin)
_output.cmax_(0)
output = _output.sum()
if self.size_average:
output = output / y.size(0)
self.save_for_backward(input1, input2, y)
return input1.new((output,))
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == float('inf'):
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(self.buffer, input)
torch.max(self.norm, self._indices, self.buffer, 1)
self.norm.add_(self.eps)
else:
self.normp = self.normp or input.new()
if self.p % 2 != 0:
torch.abs(self.buffer, input).pow_(self.p)
else:
torch.pow(self.buffer, input, self.p)
torch.sum(self.normp, self.buffer, 1).add_(self.eps)
torch.pow(self.norm, self.normp, 1./self.p)
torch.div(self._output, input, self.norm.view(-1, 1).expand_as(input))
self.output = self._output.view(input_size)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input).zero_()
size = list(input.size())
size.insert(self.dim, 1)
gradInput = self.gradInput.view(*size)
torch.sum(gradInput, gradOutput, self.dim)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
if self._input is None:
self._input = input.new()
self._gradWeight = input.new()
self._sum = input.new()
batchSize = input.size(0)
contiguousView(self._input, input, batchSize, -1)
contiguousView(self._gradOutput, gradOutput, batchSize, -1)
self._gradWeight = self.gradWeight.view(1, -1)
torch.mul(self._repeat, self._input, self._gradOutput)
torch.sum(self._sum, self._repeat, 0)
self._gradWeight.add_(scale, self._sum)
def updateOutput(self, input, target):
# - log(input) * target - log(1 - input) * (1 - target)
if input.nelement() != target.nelement():
raise RuntimeError("input and target size mismatch")
self.buffer = self.buffer or input.new()
buffer = self.buffer
weights = self.weights
buffer.resize_as_(input)
if weights is not None and target.dim() != 1:
weights = self.weights.view(1, target.size(1)).expand_as(target)
# log(input) * target
torch.add(buffer, input, self.eps).log_()
if weights is not None:
buffer.mul_(weights)
output = torch.dot(target, buffer)
# log(1 - input) * (1 - target)
torch.mul(buffer, input, -1).add_(1+self.eps).log_()
if weights is not None:
buffer.mul_(weights)
output = output + torch.sum(buffer)
output = output - torch.dot(target, buffer)
if self.sizeAverage:
output = output / input.nelement()
self.output = - output
return self.output