def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY = 10):
""" Stochastic Gradient Descent """
ANNEAL_EVERY = 20000
if useSaved:
start_iter, oldx, state = load_saved_params()
if start_iter > 0:
x0 = oldx;
step *= 0.5 ** (start_iter / ANNEAL_EVERY)
if state:
random.setstate(state)
else:
start_iter = 0
x = x0
if not postprocessing:
postprocessing = lambda x: x
expcost = None
for iter in xrange(start_iter + 1, iterations + 1):
cost = None
cost, grad = f(x)
x -= step * grad
x = postprocessing(x)
if iter % PRINT_EVERY == 0:
if not expcost:
expcost = cost
else:
expcost = .95 * expcost + .05 * cost
print "iter %d: %f" % (iter, expcost)
if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
save_params(iter, x)
if iter % ANNEAL_EVERY == 0:
step *= 0.5
return x
评论列表
文章目录