def build_model_graph(self):
self.filter_tensors = {}
self.bias_tensors = {}
# lots to decisions
with tf.variable_scope(self.name) as self.scope:
self.input_place_holder = tf.placeholder(tf.float32, shape=(None, self.params.window, self.params.ob_size * 4 + 2), name='input')
curr_dimension = [tf.shape(self.input_place_holder)[0], self.params.window, self.params.ob_size * 4 + 2, 1]
curr_layer = tf.reshape(self.input_place_holder, curr_dimension)
for name, layer_params in sorted(self.layers.items()):
print curr_dimension
print curr_layer
if layer_params['type'] == 'conv':
n = 'conv_{}_filter_size_{}_stride_{}_num_{}'.format(name, layer_params['size'], layer_params['stride'], layer_params['num'])
s = [layer_params['size'], layer_params['size'], curr_dimension[3], layer_params['num']]
strides = [1, layer_params['stride'], layer_params['stride'], 1]
self.filter_tensors[name] = tf.Variable(tf.truncated_normal(s, stddev=0.0001), name=n)
self.bias_tensors[name] = tf.Variable(tf.truncated_normal(shape=[layer_params['num']], stddev=0.1), name=n + '_bias')
conv_output = tf.nn.conv2d(curr_layer, self.filter_tensors[name], strides, "VALID")
conv_bias = tf.nn.bias_add(conv_output, self.bias_tensors[name])
curr_layer = tf.nn.relu(conv_bias)
curr_dimension = compute_output_size(curr_dimension[0], curr_dimension[1], curr_dimension[2],layer_params['size'], layer_params['stride'], 0, layer_params['num'])
if layer_params['type'] == 'pool':
if layer_params['pool_type'] == 'max':
s = [1, layer_params['size'], layer_params['size'], 1]
stride = [1, layer_params['stride'], layer_params['stride'], 1]
x = tf.nn.max_pool(curr_layer, s, stride, 'VALID')
curr_layer = x
curr_dimension = compute_pool_size(curr_dimension[0], curr_dimension[1], curr_dimension[2],layer_params['size'], layer_params['stride'], curr_dimension[3])
if layer_params['pool_type'] == 'avg':
s = [1, layer_params['size'], layer_params['size'], 1]
stride = [1, layer_params['stride'], layer_params['stride'], 1]
x = tf.nn.avg_pool(curr_layer, s, stride, 'VALID')
curr_layer = x
curr_dimension = compute_pool_size(curr_dimension[0], curr_dimension[1], curr_dimension[2],layer_params['size'], layer_params['stride'], curr_dimension[3])
if layer_params['type'] == 'fc':
print 'hi'
print curr_dimension
print curr_layer
if not self.advantage:
final_s = [curr_dimension[1], curr_dimension[2], curr_dimension[3],self.params.actions]
strides = [1,1,1,1]
projection = tf.Variable(tf.truncated_normal(final_s, stddev=0.1), name="final_projection")
bias = tf.Variable(tf.truncated_normal([self.params.actions], stddev=0.1), name="final_projection")
self.outs = tf.nn.conv2d(curr_layer, projection, strides, 'VALID') + bias
self.predictions = tf.squeeze(self.outs, squeeze_dims=[1, 2])
else:
self.advantage_stream, self.value_stream = tf.split(curr_layer, 2, 3)
final_s_a = [curr_dimension[1], curr_dimension[2], curr_dimension[3]/2, self.params.actions]
final_s_v = [curr_dimension[1], curr_dimension[2], curr_dimension[3]/2, 1]
strides = [1,1,1,1]
self.projection_a = tf.Variable(tf.truncated_normal(final_s_a, stddev=0.01), name="final_projection")
self.projection_v = tf.Variable(tf.truncated_normal(final_s_v, stddev=0.01), name="final_projection")
self.A = tf.squeeze(tf.nn.conv2d(self.advantage_stream, self.projection_a, strides, 'VALID'), squeeze_dims=[1,2])
self.V = tf.squeeze(tf.nn.conv2d(self.value_stream, self.projection_v, strides, 'VALID'), squeeze_dims=[1,2])
self.predictions = self.V + tf.subtract(self.A, tf.reduce_mean(self.A, axis=1, keep_dims=True))
self.min_score = tf.reduce_min(self.predictions, axis=[1])
self.min_action = tf.argmin(tf.squeeze(self.predictions), axis=0, name="arg_min")
q_learners.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录