def __init__(self):
print "Initializing DQN..."
self.exploration_rate = config.rl_initial_exploration
self.fcl_eliminated = True if len(config.q_fc_hidden_units) == 0 else False
# Q Network
conv, fc = build_q_network(config)
self.conv = conv
if self.fcl_eliminated is False:
self.fc = fc
self.load()
self.update_target()
# Optimizer
## RMSProp, ADAM, AdaGrad, AdaDelta, ...
## See http://docs.chainer.org/en/stable/reference/optimizers.html
self.optimizer_conv = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
self.optimizer_conv.setup(self.conv)
if self.fcl_eliminated is False:
self.optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
self.optimizer_fc.setup(self.fc)
# Replay Memory
## (state, action, reward, next_state, episode_ends)
shape_state = (config.rl_replay_memory_size, config.rl_agent_history_length * config.ale_screen_channels, config.ale_scaled_screen_size[1], config.ale_scaled_screen_size[0])
shape_action = (config.rl_replay_memory_size,)
self.replay_memory = [
np.zeros(shape_state, dtype=np.float32),
np.zeros(shape_action, dtype=np.uint8),
np.zeros(shape_action, dtype=np.int8),
np.zeros(shape_state, dtype=np.float32),
np.zeros(shape_action, dtype=np.bool)
]
self.total_replay_memory = 0
self.no_op_count = 0
评论列表
文章目录