def deep_q_network():
""" Architecture according to:
http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html
"""
@tt.model(tracker=tf.train.ExponentialMovingAverage(1 - .0005), # TODO: replace with original weight freeze
optimizer=tf.train.RMSPropOptimizer(.00025, .95, .95, .01))
def q_network(x):
x /= 255
x = layers.conv2d(x, 32, 8, 4)
x = layers.conv2d(x, 64, 4, 2)
x = layers.conv2d(x, 64, 3, 1)
x = layers.flatten(x)
x = layers.fully_connected(x, 512)
x = layers.fully_connected(x, env.action_space.n, activation_fn=None)
x = tf.identity(x, name='Q')
return x
return q_network
评论列表
文章目录