def __init__(self, action_space, screen=(84, 84), n_step=8, discount=0.99):
from keras.optimizers import RMSprop
# -----
self.screen = screen
self.input_depth = 1
self.past_range = 3
self.observation_shape = (self.input_depth * self.past_range,) + self.screen
self.action_value = build_network(self.observation_shape, action_space.n)
self.action_value.compile(optimizer=RMSprop(clipnorm=1.), loss='mse') # clipnorm=1.
self.action_space = action_space
self.observations = np.zeros(self.observation_shape)
self.last_observations = np.zeros_like(self.observations)
# -----
self.n_step_observations = deque(maxlen=n_step)
self.n_step_actions = deque(maxlen=n_step)
self.n_step_rewards = deque(maxlen=n_step)
self.n_step = n_step
self.discount = discount
self.counter = 0
评论列表
文章目录