def coordinate_Newton(losses, indice, grad, hess, batch_size, mt_arr, vt_arr, real_modifier, up, down, lr, adam_epoch, beta1, beta2, proj):
# def sign(x):
# return np.piecewise(x, [x < 0, x >= 0], [-1, 1])
cur_loss = losses[0]
for i in range(batch_size):
grad[i] = (losses[i*2+1] - losses[i*2+2]) / 0.0002
hess[i] = (losses[i*2+1] - 2 * cur_loss + losses[i*2+2]) / (0.0001 * 0.0001)
# print("New epoch:")
# print('grad', grad)
# print('hess', hess)
# hess[hess < 0] = 1.0
# hess[np.abs(hess) < 0.1] = sign(hess[np.abs(hess) < 0.1]) * 0.1
# negative hessian cannot provide second order information, just do a gradient descent
hess[hess < 0] = 1.0
# hessian too small, could be numerical problems
hess[hess < 0.1] = 0.1
# print(hess)
m = real_modifier.reshape(-1)
old_val = m[indice]
old_val -= lr * grad / hess
# set it back to [-0.5, +0.5] region
if proj:
old_val = np.maximum(np.minimum(old_val, up[indice]), down[indice])
# print('delta', old_val - m[indice])
m[indice] = old_val
# print(m[indice])
评论列表
文章目录