def pool_max(tensor, dim):
return torch.max(tensor, dim)[0]
python类max()的实例源码
models.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def forward(self, inp, hidden):
emb = self.drop(self.encoder(inp))
outp = self.bilstm(emb, hidden)[0]
if self.pooling == 'mean':
outp = torch.mean(outp, 0).squeeze()
elif self.pooling == 'max':
outp = torch.max(outp, 0)[0].squeeze()
elif self.pooling == 'all' or self.pooling == 'all-word':
outp = torch.transpose(outp, 0, 1).contiguous()
return outp, emb
models.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __init__(self, config):
super(Classifier, self).__init__()
if config['pooling'] == 'mean' or config['pooling'] == 'max':
self.encoder = BiLSTM(config)
self.fc = nn.Linear(config['nhid'] * 2, config['nfc'])
elif config['pooling'] == 'all':
self.encoder = SelfAttentiveEncoder(config)
self.fc = nn.Linear(config['nhid'] * 2 * config['attention-hops'], config['nfc'])
else:
raise Exception('Error when initializing Classifier')
self.drop = nn.Dropout(config['dropout'])
self.tanh = nn.Tanh()
self.pred = nn.Linear(config['nfc'], config['class-number'])
self.dictionary = config['dictionary']
# self.init_weights()
def grad_variance(self):
global_state = self._global_state
beta = self._beta
self._grad_var = np.array(0.0, dtype=np.float32)
for group_id, group in enumerate(self._optimizer.param_groups):
for p_id, p in enumerate(group['params'] ):
if p.grad is None:
continue
grad = p.grad.data
state = self._optimizer.state[p]
if self._iter == 0:
state["grad_avg"] = grad.new().resize_as_(grad).zero_()
state["grad_avg_squared"] = 0.0
state["grad_avg"].mul_(beta).add_(1 - beta, grad)
self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] )
if self._zero_debias:
debias_factor = self.zero_debias_factor()
else:
debias_factor = 1.0
self._grad_var /= -(debias_factor**2)
self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor
# in case of negative variance: the two term are using different debias factors
self._grad_var = max(self._grad_var, eps)
if self._sparsity_debias:
self._grad_var *= self._sparsity_avg
return
def get_mu(self):
root = self.get_cubic_root()
dr = max( (self._h_max + eps) / (self._h_min + eps), 1.0 + eps)
self._mu_t = max(root**2, ( (np.sqrt(dr) - 1) / (np.sqrt(dr) + 1) )**2 )
return
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def grad_variance(self):
global_state = self._global_state
beta = self._beta
self._grad_var = np.array(0.0, dtype=np.float32)
for group_id, group in enumerate(self._optimizer.param_groups):
for p_id, p in enumerate(group['params'] ):
if p.grad is None:
continue
grad = p.grad.data
state = self._optimizer.state[p]
if self._iter == 0:
state["grad_avg"] = grad.new().resize_as_(grad).zero_()
state["grad_avg_squared"] = 0.0
state["grad_avg"].mul_(beta).add_(1 - beta, grad)
self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] )
if self._zero_debias:
debias_factor = self.zero_debias_factor()
else:
debias_factor = 1.0
self._grad_var /= -(debias_factor**2)
self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor
# in case of negative variance: the two term are using different debias factors
self._grad_var = max(self._grad_var, eps)
if self._sparsity_debias:
self._grad_var *= self._sparsity_avg
return
def get_mu(self):
root = self.get_cubic_root()
dr = (self._h_max + eps) / (self._h_min + eps)
self._mu_t = max(root**2, ( (np.sqrt(dr) - 1) / (np.sqrt(dr) + 1) )**2 )
return
def load_data(resize):
data_transforms = {
'train': transforms.Compose([
transforms.RandomSizedCrop(max(resize)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
#Higher scale-up for inception
transforms.Scale(int(max(resize)/224*256)),
transforms.CenterCrop(max(resize)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'PlantVillage'
dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
for x in ['train', 'val']}
dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
shuffle=True)
for x in ['train', 'val']}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes
return dset_loaders['train'], dset_loaders['val']
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
model.eval()
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(3))
class_total = list(0. for i in range(3))
for i, data in enumerate(testloader, 0):
if i == 100:
break
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(3):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
model.eval()
if i == 20:
break;
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def vec_to_classnum(onehot):
return torch.max(onehot, -1)[1][0]
def evaluate(model, testloader, args, use_cuda=False):
correct = 0
total = 0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for i, data in enumerate(testloader, 0):
if i == 20:
break;
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = targets.cuda()
else:
inputs = Variable(inputs.float())
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum()
c = (predicted == targets).squeeze()
for i in range(args.batch_size):
target = targets[i]
class_correct[target] += c[i]
class_total[target] += 1
print("Accuracy of the network is: %.5f %%" % (correct / total * 100))
for i in range(2):
if class_total[i] == 0:
print("Accuracy of %1s : %1s %% (%1d / %1d)" % (classes[i], "NaN", class_correct[i], class_total[i]))
else:
print("Accuracy of %1s : %.5f %% (%1d / %1d)" % (classes[i], class_correct[i] / class_total[i] * 100, class_correct[i], class_total[i]))
return correct / total