def __init__(self, state_shape, action_num, image_num_per_state,
model,
gamma=0.99, # discount factor
replay_batch_size=32,
replay_memory_size=5*10**4,
target_model_update_freq=1,
max_step=50,
lr=0.00025,
clipping=False # if True, ignore reward intensity
):
print("initializing DQN...")
self.action_num = action_num
self.image_num_per_state = image_num_per_state
self.gamma = gamma
self.replay_batch_size = replay_batch_size
self.replay_memory_size = replay_memory_size
self.target_model_update_freq = target_model_update_freq
self.max_step = max_step
self.clipping = clipping
print("Initializing Model...")
self.model = model
self.model_target = copy.deepcopy(self.model)
print("Initializing Optimizer")
self.optimizer = optimizers.RMSpropGraves(lr=lr, alpha=0.95, momentum=0.0, eps=0.01)
self.optimizer.setup(self.model)
self.optimizer.add_hook(chainer.optimizer.GradientClipping(20))
print("Initializing Replay Buffer...")
self.dataset = dataset.DataSet(
max_size=replay_memory_size, max_step=max_step, frame_shape=state_shape, frame_dtype=np.uint8)
self.xp = model.xp
self.state_shape = state_shape
评论列表
文章目录