def __init__(self,
agent,
memory_size=10**4,
replay_size=32,
gamma=0.99,
initial_exploration=10**4,
target_update_freq=10**4,
learning_rate=0.00025,
epsilon_decay=1e-6,
minimum_epsilon=0.1):
self.agent = agent
self.target = Q(self.agent.q.n_history, self.agent.q.n_action, on_gpu=self.agent.q.on_gpu)
self.memory_size = memory_size
self.replay_size = replay_size
self.gamma = gamma
self.initial_exploration = initial_exploration
self.target_update_freq = target_update_freq
self.learning_rate = learning_rate
self.epsilon_decay = epsilon_decay
self.minimum_epsilon = minimum_epsilon
self._step = 0
# prepare memory for replay
n_hist = self.agent.q.n_history
size = self.agent.q.SIZE
self.memory = [
np.zeros((memory_size, n_hist, size, size), dtype=np.float32),
np.zeros(memory_size, dtype=np.uint8),
np.zeros((memory_size, 1), dtype=np.float32),
np.zeros((memory_size, n_hist, size, size), dtype=np.float32),
np.zeros((memory_size, 1), dtype=np.bool)
]
self.memory_text = [
"state", "action", "reward", "next_state", "episode_end"
]
# prepare optimizer
self.optimizer = optimizers.RMSpropGraves(lr=learning_rate, alpha=0.95, momentum=0.95, eps=0.01)
self.optimizer.setup(self.agent.q)
self._loss = 9
self._qv = 0
评论列表
文章目录