def adagrad():
theta = ones((features.num_features)) * -.1
theta[0] = 1.0
master_stepsize = 1e-1 #for example
fudge_factor = 1e-6 #for numerical stability
historical_grad = 0
for iteration in xrange(10):
print "iteration", iteration
for i, (x, y) in enumerate(train):
print i
theta_g = zeros_like(theta)
t.grad_features(x, y, i, theta, theta_g, features, threshold)
historical_grad += theta_g * theta_g
adjusted_grad = theta_g / (fudge_factor + np.sqrt(historical_grad))
theta -= master_stepsize * adjusted_grad
print f(theta)
return theta
#cProfile.runctx("adagrad()", globals(), locals(), '.prof')
#s = pstats.Stats('.prof')
#s.strip_dirs().sort_stats('time').print_stats(30)
评论列表
文章目录