def __init__(self, maxlen, input_shape, action_size):
self.maxlen = maxlen
dirname = tempfile.mkdtemp()
#use memory maps so we won't have to worry about eating up lots of RAM
get_path = lambda name: os.path.join(dirname, name)
self.screens = np.memmap(get_path('screens'), dtype=np.float32, mode='w+', shape=tuple([self.maxlen]+input_shape))
self.actions = np.memmap(get_path('actions'), dtype=np.float32, mode='w+', shape=(self.maxlen, action_size))
self.rewards = np.memmap(get_path('rewards'), dtype=np.float32, mode='w+', shape=(self.maxlen,))
self.is_terminal = np.memmap(get_path('terminals'), dtype=np.bool, mode='w+', shape=(self.maxlen,))
self.position = 0
self.full = False
# def _get_states(batch):
# s = list()
# for i in xrange(-3, 2):
# s.append(self.screens[batch+i])
# return np.vstack(s[:-1]), np.vstack(s[1:])
评论列表
文章目录