def __init__(self,
graph,
exploration_steps,
total_steps,
gamma,
a3c_update_interval,
action_sampler):
"""
graph should have the placeholders called "states", "actions",
and "returns". It should also have operations called "loss_op", "train_op",
"probs", and "value".
"""
self.graph = graph
self.gamma = gamma
self.a3c_update_interval = a3c_update_interval
self.action_sampler = action_sampler
self.T = graph.get_collection("global_step")[0]
self.exploration_steps = exploration_steps
self.total_steps = total_steps
self.incr_T = tf.assign_add(self.T, 1)
评论列表
文章目录