def __init__(self, q_function, optimizer,
t_max, gamma, i_target, explorer, phi=lambda x: x,
average_q_decay=0.999, logger=getLogger(__name__),
batch_states=batch_states):
self.shared_q_function = q_function
self.target_q_function = copy.deepcopy(q_function)
self.q_function = copy.deepcopy(self.shared_q_function)
async.assert_params_not_shared(self.shared_q_function, self.q_function)
self.optimizer = optimizer
self.t_max = t_max
self.gamma = gamma
self.explorer = explorer
self.i_target = i_target
self.phi = phi
self.logger = logger
self.average_q_decay = average_q_decay
self.batch_states = batch_states
self.t_global = mp.Value('l', 0)
self.t = 0
self.t_start = 0
self.past_action_values = {}
self.past_states = {}
self.past_rewards = {}
self.average_q = 0
评论列表
文章目录