def learn_actor(self, s, x_ma, epoch): # batch update
_, police_grads = self.sess.run(self.train_ops, feed_dict={S: s, X_MA: x_ma})
# the following method for soft replace target params is computational expansive
# target_params = (1-tau) * target_params + tau * eval_params
# self.sess.run([tf.assign(t, (1 - self.tau) * t + self.tau * e) for t, e in zip(self.t_params, self.e_params)])
summary = tf.Summary()
# summary.value.add(tag='info/c_gradient{}'.format(self.agent_id), simple_value=float(_c_grad))
summary.value.add(tag='info/police_grads{}'.format(self.agent_id), simple_value=np.mean([np.mean(_) for _ in police_grads]))
writer.add_summary(summary, epoch)
writer.flush()
# instead of above method, I use a hard replacement here
if self.t_replace_counter % self.t_replace_iter == 0:
self.sess.run([tf.assign(t, e) for t, e in zip(self.t_params, self.e_params)])
self.t_replace_counter += 1
评论列表
文章目录