def build_model_graph(self):
with tf.variable_scope(self.name) as self.scope:
self.input_place_holder = tf.placeholder(tf.float32, shape=(None, self.params.window, self.params.ob_size * 4 + 2), name='input')
self.forward_cell_layers = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(self.params.hidden_size) for i in range(self.params.hidden_depth)])
self.rnn_output, self.final_rnn_state = tf.nn.dynamic_rnn(self.forward_cell_layers, self.input_place_holder, dtype=tf.float32)
self.outs = tf.squeeze(tf.slice(self.rnn_output, [0, self.params.window - 1, 0], [-1, 1, self.params.hidden_size]), axis=1)
if not self.advantage:
self.U = tf.get_variable('U', shape=[self.params.hidden_size, self.params.actions])
self.b_2 = tf.get_variable('b2', shape=[self.params.actions])
self.predictions = tf.cast((tf.matmul(self.outs, self.U) + self.b_2), 'float32')
else:
self.advantage_stream, self.value_stream = tf.split(self.outs, 2, 1)
self.U_a = tf.get_variable('U_a', shape=[self.params.hidden_size//2, self.params.actions])
self.U_v = tf.get_variable('U_v', shape=[self.params.hidden_size//2, 1])
self.A = tf.cast(tf.matmul(self.advantage_stream, self.U_a), 'float32')
self.V = tf.cast(tf.matmul(self.value_stream, self.U_v), 'float32')
self.predictions = self.V + tf.subtract(self.A, tf.reduce_mean(self.A, axis=1, keep_dims=True))
self.min_score = tf.reduce_min(self.predictions, reduction_indices=[1])
self.min_action = tf.argmin(tf.squeeze(self.predictions), axis=0, name="arg_min")
q_learners.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录