def step(self, action, mode):
qvel, qpos = [], []
if mode == 'tensorflow':
if self.random_initialization:
state, reward, done, qval, qpos = tf.py_func(self._step, inp=[action], Tout=[tf.float32, tf.float32, tf.bool, tf.float32, tf.float32], name='env_step_func')
else:
state, reward, done = tf.py_func(self._step, inp=[action],
Tout=[tf.float32, tf.float32, tf.bool],
name='env_step_func')
state = tf.reshape(state, shape=(self.state_size,))
done.set_shape(())
else:
if self.random_initialization:
state, reward, done, qvel, qpos = self._step(action)
else:
state, reward, done = self._step(action)
return state, reward, done, 0., qvel, qpos
评论列表
文章目录