def __init__(self,
env,
policy,
episode_len=100,
discount=False,
optimizer='sgd'):
raise NotImplementedError
self.env = env
self.policy = policy
self.episode_len = episode_len
self.discount = discount
self.states = tf.placeholder(tf.float32, shape=(None, 4))
self.actions = tf.placeholder(tf.float32, shape=(None, 2))
self.rewards = tf.placeholder(tf.float32, shape=(None))
self.probs = self.policy.model(self.states)
self.action_probs = tf.mul(self.probs, self.actions)
self.reduced_action_probs = tf.reduce_sum(self.action_probs, reduction_indices=[1])
self.logprobs = tf.log(self.reduced_action_probs)
self.eligibility = self.logprobs * self.rewards
self.L = -tf.reduce_sum(self.eligibility)
# fisher matrix
self.F = tf.mul(self.logprobs, tf.transpose(self.logprobs))
# TODO: gen optimizer based on param
self.opt = tf.train.AdamOptimizer(0.005).minimize(self.L)
# do gradient update separately so do apply custom function to gradients?
# self.grads_and_vars = self.opt.compute_gradients(self.L)
# self.apply_grads = self.opt.apply_gradients(self.grads_and_vars)
self.sess = tf.Session()
self.sess.run(tf.initialize_all_variables())
评论列表
文章目录