def lr_grad_norm_avg(self):
# this is for enforcing lr * grad_norm not
# increasing dramatically in case of instability.
# Not necessary for basic use.
global_state = self._global_state
beta = self._beta
if "lr_grad_norm_avg" not in global_state:
global_state['grad_norm_squared_avg_log'] = 0.0
global_state['grad_norm_squared_avg_log'] = \
global_state['grad_norm_squared_avg_log'] * beta \
+ (1 - beta) * np.log(global_state['grad_norm_squared'] + eps)
if "lr_grad_norm_avg" not in global_state:
global_state["lr_grad_norm_avg"] = \
0.0 * beta + (1 - beta) * np.log(self._lr * np.sqrt(global_state['grad_norm_squared'] ) + eps)
# we monitor the minimal smoothed ||lr * grad||
global_state["lr_grad_norm_avg_min"] = \
np.exp(global_state["lr_grad_norm_avg"] / self.zero_debias_factor() )
else:
global_state["lr_grad_norm_avg"] = global_state["lr_grad_norm_avg"] * beta \
+ (1 - beta) * np.log(self._lr * np.sqrt(global_state['grad_norm_squared'] ) + eps)
global_state["lr_grad_norm_avg_min"] = \
min(global_state["lr_grad_norm_avg_min"],
np.exp(global_state["lr_grad_norm_avg"] / self.zero_debias_factor() ) )
python类min()的实例源码
def update_hyper_param(self):
for group in self._optimizer.param_groups:
group['momentum'] = self._mu_t
#group['momentum'] = max(self._mu, self._mu_t)
if self._force_non_inc_step == False:
group['lr'] = self._lr_t * self._lr_factor
# a loose clamping to prevent catastrophically large move. If the move
# is too large, we set lr to 0 and only use the momentum to move
if self._adapt_clip and (group['lr'] * np.sqrt(self._global_state['grad_norm_squared']) >= self._catastrophic_move_thresh):
group['lr'] = self._catastrophic_move_thresh / np.sqrt(self._global_state['grad_norm_squared'] + eps)
if self._verbose:
logging.warning("clip catastropic move!")
elif self._iter > self._curv_win_width:
# force to guarantee lr * grad_norm not increasing dramatically.
# Not necessary for basic use. Please refer to the comments
# in YFOptimizer.__init__ for more details
self.lr_grad_norm_avg()
debias_factor = self.zero_debias_factor()
group['lr'] = min(self._lr * self._lr_factor,
2.0 * self._global_state["lr_grad_norm_avg_min"] \
/ (np.sqrt(np.exp(self._global_state['grad_norm_squared_avg_log'] / debias_factor) ) + eps) )
return
def update_hyper_param(self):
for group in self._optimizer.param_groups:
group['momentum'] = self._mu
if self._force_non_inc_step == False:
group['lr'] = min(self._lr * self._lr_factor,
self._lr_grad_norm_thresh / (math.sqrt(self._global_state["grad_norm_squared"] ) + eps) )
elif self._iter > self._curv_win_width:
# force to guarantee lr * grad_norm not increasing dramatically.
# Not necessary for basic use. Please refer to the comments
# in YFOptimizer.__init__ for more details
self.lr_grad_norm_avg()
debias_factor = self.zero_debias_factor()
group['lr'] = min(self._lr * self._lr_factor,
2.0 * self._global_state["lr_grad_norm_avg_min"] \
/ (np.sqrt(np.exp(self._global_state['grad_norm_squared_avg_log'] / debias_factor) ) + eps) )
return
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
#pdb.set_trace()
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def cosine_similarity(x1, x2, dim=1, eps=1e-8):
r"""Returns cosine similarity between x1 and x2, computed along dim.
Args:
x1 (Variable): First input.
x2 (Variable): Second input (of size matching x1).
dim (int, optional): Dimension of vectors. Default: 1
eps (float, optional): Small value to avoid division by zero. Default: 1e-8
Shape:
- Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`.
- Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`.
"""
w12 = torch.sum(x1 * x2, dim)
w1 = torch.norm(x1, 2, dim)
w2 = torch.norm(x2, 2, dim)
return (w12 / (w1 * w2).clamp(min=eps)).squeeze()
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is flattened into a vector,
i.e. :math:`\lVert v \rVert_p` is not a matrix norm.
With default arguments normalizes over the second dimension with Euclidean norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation
dim (int): the dimension to reduce
eps (float): small value to avoid division by zero
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is
flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
norm.
With default arguments normalizes over the second dimension with Euclidean
norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation. Default: 2
dim (int): the dimension to reduce. Default: 1
eps (float): small value to avoid division by zero. Default: 1e-12
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def get_mean_and_std(dataset, max_load=10000):
'''Compute the mean and std value of dataset.'''
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
mean = torch.zeros(3)
std = torch.zeros(3)
print('==> Computing mean and std..')
N = min(max_load, len(dataset))
for i in range(N):
print(i)
im,_,_ = dataset.load(1)
for j in range(3):
mean[j] += im[:,j,:,:].mean()
std[j] += im[:,j,:,:].std()
mean.div_(N)
std.div_(N)
return mean, std
def normalize(input, p=2, dim=1, eps=1e-12):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
Does:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
for each subtensor v over dimension dim of input. Each subtensor is
flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
norm.
With default arguments normalizes over the second dimension with Euclidean
norm.
Args:
input: input tensor of any shape
p (float): the exponent value in the norm formulation. Default: 2
dim (int): the dimension to reduce. Default: 1
eps (float): small value to avoid division by zero. Default: 1e-12
"""
return input / input.norm(p, dim, True).clamp(min=eps).expand_as(input)
def min(x, axis=None, keepdims=False):
def _min(x, axis, keepdims):
y = torch.min(x, axis)[0]
# Since keepdims argument of torch not functional
return y if keepdims else torch.squeeze(y, axis)
def _compute_output_shape(x, axis, keepdims):
if axis is None:
return ()
shape = list(_get_shape(x))
if keepdims:
shape[axis] = 1
else:
del shape[axis]
return tuple(shape)
return get_op(_min, output_shape=_compute_output_shape, arguments=[axis, keepdims])(x)
def lr_grad_norm_avg(self):
# this is for enforcing lr * grad_norm not
# increasing dramatically in case of instability.
# Not necessary for basic use.
global_state = self._global_state
beta = self._beta
if "lr_grad_norm_avg" not in global_state:
global_state['grad_norm_squared_avg_log'] = 0.0
global_state['grad_norm_squared_avg_log'] = \
global_state['grad_norm_squared_avg_log'] * beta \
+ (1 - beta) * np.log(global_state['grad_norm_squared'] + eps)
if "lr_grad_norm_avg" not in global_state:
global_state["lr_grad_norm_avg"] = \
0.0 * beta + (1 - beta) * np.log(self._lr * np.sqrt(global_state['grad_norm_squared'] ) + eps)
# we monitor the minimal smoothed ||lr * grad||
global_state["lr_grad_norm_avg_min"] = \
np.exp(global_state["lr_grad_norm_avg"] / self.zero_debias_factor() )
else:
global_state["lr_grad_norm_avg"] = global_state["lr_grad_norm_avg"] * beta \
+ (1 - beta) * np.log(self._lr * np.sqrt(global_state['grad_norm_squared'] ) + eps)
global_state["lr_grad_norm_avg_min"] = \
min(global_state["lr_grad_norm_avg_min"],
np.exp(global_state["lr_grad_norm_avg"] / self.zero_debias_factor() ) )
def logaddexp(x1: T.FloatTensor, x2: T.FloatTensor) -> T.FloatTensor:
"""
Elementwise logaddexp function: log(exp(x1) + exp(x2))
Args:
x1: A tensor.
x2: A tensor.
Returns:
tensor: Elementwise logaddexp.
"""
# log(exp(x1) + exp(x2))
# = log( exp(x1) (1 + exp(x2 - x1))) = x1 + log(1 + exp(x2 - x1))
# = log( exp(x2) (exp(x1 - x2) + 1)) = x2 + log(1 + exp(x1 - x2))
diff = torch.min(x2 - x1, x1 - x2)
return torch.max(x1, x2) + torch.log1p(exp(diff))
def tmin(x: T.FloatTensor,
axis: int = None,
keepdims: bool = False) -> T.FloatingPoint:
"""
Return the elementwise minimum of a tensor along the specified axis.
Args:
x: A float or tensor.
axis (optional): The axis for taking the minimum.
keepdims (optional): If this is set to true, the dimension of the tensor
is unchanged. Otherwise, the reduced axis is removed
and the dimension of the array is 1 less.
Returns:
if axis is None:
float: The overall minimum of the elements in the tensor
else:
tensor: The minimum of the tensor along the specified axis.
"""
if axis is not None:
return x.min(dim=axis, keepdim=keepdims)[0]
else:
return x.min()
def get_update(self):
actions, log_actions, rewards, critics, entropies, states, advantages = self._sample()
# Compute auxiliary losses
critics = self.critic(states)
critic_loss = (rewards - critics).pow(2).mean()
critic_loss = self.critic_weight * critic_loss
entropy_loss = entropies.mean()
entropy_loss = - self.entropy_weight * entropy_loss
# Compute policy loss
advantages = advantages.detach().view(-1, 1)
new_actions = self.policy(states)
log_probs = new_actions.compute_log_prob(actions)
ratios = (log_probs - log_actions.detach()).exp()
surr = ratios.view(-1, 1) * advantages
clipped = th.clamp(ratios, 1.0 - self.clip, 1.0 + self.clip).view(-1, 1) * advantages
policy_loss = - th.min(surr, clipped).mean()
# Proceed to optimization
loss = policy_loss + critic_loss + entropy_loss
if self.epoch_optimized == self.num_epochs:
loss.backward(retain_graph=False)
else:
loss.backward(retain_graph=True)
if self.grad_clip > 0.0:
th.nn.utils.clip_grad_norm(self.parameters(), self.grad_clip)
# Store statistics
self.stats['Num. Updates'] += 1.0
self.stats['Critic Loss'] += critic_loss.data[0]
self.stats['Entropy Loss'] += entropy_loss.data[0]
self.stats['Policy Loss'] += policy_loss.data[0]
self.stats['Total Loss'] += loss.data[0]
return [p.grad.clone() for p in self.parameters()]
def get_lr(self):
self._lr_t = (1.0 - math.sqrt(self._mu_t) )**2 / (self._h_min + eps)
# slow start of lr to prevent huge lr when there is only a few iteration finished
self._lr_t = min(self._lr_t, self._lr_t * (self._iter + 1) / float(10.0 * self._curv_win_width) )
return
def updateOutput(self, input):
self._lazyInit()
dimension = self._getPositiveDimension(input)
torch.min(self._output, self._indices, input, dimension)
if input.dim() > 1:
self.output.set_(self._output.select(dimension, 0))
else:
self.output.set_(self._output)
return self.output
def type(self, type, tensorCache=None):
# torch.min expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
if type == 'torch.cuda.FloatTensor':
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.type('torch.cuda.LongTensor') if indices else None
else:
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
# unnecessary memory allocations.
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.long() if indices else None
return self
def test_min(self):
self._testSelection(torch.min, min)
def test_cmin(self):
self._testCSelection(torch.cmin, min)
def test_clamp(self):
m1 = torch.rand(100).mul(5).add(-2.5) # uniform in [-2.5, 2.5]
# just in case we're extremely lucky.
min_val = -1
max_val = 1
m1[1] = min_val
m1[2] = max_val
res1 = m1.clone()
res1.clamp_(min_val, max_val)
res2 = m1.clone()
for i in iter_indices(res2):
res2[i] = max(min_val, min(max_val, res2[i]))
self.assertEqual(res1, res2)
def bbox_iou(box1, box2, x1y1x2y2=True):
if x1y1x2y2:
mx = min(box1[0], box2[0])
Mx = max(box1[2], box2[2])
my = min(box1[1], box2[1])
My = max(box1[3], box2[3])
w1 = box1[2] - box1[0]
h1 = box1[3] - box1[1]
w2 = box2[2] - box2[0]
h2 = box2[3] - box2[1]
else:
mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
w1 = box1[2]
h1 = box1[3]
w2 = box2[2]
h2 = box2[3]
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
carea = 0
if cw <= 0 or ch <= 0:
return 0.0
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
uarea = area1 + area2 - carea
return carea/uarea
def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
if x1y1x2y2:
mx = torch.min(boxes1[0], boxes2[0])
Mx = torch.max(boxes1[2], boxes2[2])
my = torch.min(boxes1[1], boxes2[1])
My = torch.max(boxes1[3], boxes2[3])
w1 = boxes1[2] - boxes1[0]
h1 = boxes1[3] - boxes1[1]
w2 = boxes2[2] - boxes2[0]
h2 = boxes2[3] - boxes2[1]
else:
mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
w1 = boxes1[2]
h1 = boxes1[3]
w2 = boxes2[2]
h2 = boxes2[3]
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
mask = ((cw <= 0) + (ch <= 0) > 0)
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
carea[mask] = 0
uarea = area1 + area2 - carea
return carea/uarea
def updateOutput(self, input):
self._lazyInit()
dimension = self._getPositiveDimension(input)
torch.min(input, dimension, out=(self._output, self._indices))
if input.dim() > 1:
self.output.set_(self._output.select(dimension, 0))
else:
self.output.set_(self._output)
return self.output
def type(self, type, tensorCache=None):
# torch.min expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
if type == 'torch.cuda.FloatTensor':
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.type('torch.cuda.LongTensor') if indices is not None else None
else:
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
# unnecessary memory allocations.
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.long() if indices is not None else None
return self
def test_min(self):
self._testSelection(torch.min, min)
def test_min_elementwise(self):
self._testCSelection(torch.min, min)