def train(self, observations, actions, advantages):
""" take one training step given observations, actions and subsequent advantages"""
if VERBOSE_DEBUG:
print "TRAIN"
print "observations", np.stack(observations)
print "actions", actions
print "advantages", advantages
_, loss = tf.get_default_session().run([self.train_op, self.loss],
feed_dict={self.observations: observations,
self.actions: actions,
self.advantages: advantages})
else:
_, loss = tf.get_default_session().run([self.train_op, self.loss],
feed_dict={self.observations: observations,
self.actions: actions,
self.advantages: advantages})
return float(loss)
评论列表
文章目录