def get_training_data_page(self, num_samples):
"""
Returns a TrainingDataPage with shuffled, transformed transitions from
replay memory.
:param num_samples: Number of transitions to sample from replay memory.
"""
states, actions, rewards, next_states, next_actions, terminals,\
possible_next_actions = self.sample_memories(num_samples)
return TrainingDataPage(
np.array(states, dtype=np.float32),
np.array(actions, dtype=np.float32),
np.array(rewards, dtype=np.float32),
np.array(next_states, dtype=np.float32),
np.array(next_actions, dtype=np.float32),
np.array(possible_next_actions, dtype=np.float32),
None, None, np.logical_not(terminals, dtype=np.bool)
)
评论列表
文章目录