def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = layers.convolution2d(out,
num_outputs=num_outputs,
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("action_value"):
action_out = out
for hidden in hiddens:
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
if dueling:
with tf.variable_scope("state_value"):
state_out = out
for hidden in hiddens:
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores_centered
else:
return action_scores
return out
python类flatten()的实例源码
def Dense_net(self, input_x):
x = conv_layer(input_x, filter=2 * self.filters, kernel=[7,7], stride=2, layer_name='conv0')
x = Max_Pooling(x, pool_size=[3,3], stride=2)
for i in range(self.nb_blocks) :
# 6 -> 12 -> 48
x = self.dense_block(input_x=x, nb_layers=4, layer_name='dense_'+str(i))
x = self.transition_layer(x, scope='trans_'+str(i))
"""
x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1')
x = self.transition_layer(x, scope='trans_1')
x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2')
x = self.transition_layer(x, scope='trans_2')
x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3')
x = self.transition_layer(x, scope='trans_3')
"""
x = self.dense_block(input_x=x, nb_layers=32, layer_name='dense_final')
# 100 Layer
x = Batch_Normalization(x, training=self.training, scope='linear_batch')
x = Relu(x)
x = Global_Average_Pooling(x)
x = flatten(x)
x = Linear(x)
# x = tf.reshape(x, [-1, 10])
return x
def Dense_net(self, input_x):
x = conv_layer(input_x, filter=2 * self.filters, kernel=[7,7], stride=2, layer_name='conv0')
# x = Max_Pooling(x, pool_size=[3,3], stride=2)
"""
for i in range(self.nb_blocks) :
# 6 -> 12 -> 48
x = self.dense_block(input_x=x, nb_layers=4, layer_name='dense_'+str(i))
x = self.transition_layer(x, scope='trans_'+str(i))
"""
x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1')
x = self.transition_layer(x, scope='trans_1')
x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2')
x = self.transition_layer(x, scope='trans_2')
x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3')
x = self.transition_layer(x, scope='trans_3')
x = self.dense_block(input_x=x, nb_layers=32, layer_name='dense_final')
# 100 Layer
x = Batch_Normalization(x, training=self.training, scope='linear_batch')
x = Relu(x)
x = Global_Average_Pooling(x)
x = flatten(x)
x = Linear(x)
# x = tf.reshape(x, [-1, 10])
return x
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = layers.convolution2d(out,
num_outputs=num_outputs,
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("action_value"):
action_out = out
for hidden in hiddens:
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
if dueling:
with tf.variable_scope("state_value"):
state_out = out
for hidden in hiddens:
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores_centered
else:
return action_scores
return out
def dueling_model(img_in, num_actions, scope, reuse=False, layer_norm=False):
"""As described in https://arxiv.org/abs/1511.06581"""
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
conv_out = layers.flatten(out)
with tf.variable_scope("state_value"):
state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
if layer_norm:
state_hidden = layer_norm_fn(state_hidden, relu=True)
else:
state_hidden = tf.nn.relu(state_hidden)
state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
with tf.variable_scope("action_value"):
actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
if layer_norm:
actions_hidden = layer_norm_fn(actions_hidden, relu=True)
else:
actions_hidden = tf.nn.relu(actions_hidden)
action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = layers.convolution2d(out,
num_outputs=num_outputs,
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu)
conv_out = layers.flatten(out)
with tf.variable_scope("action_value"):
action_out = conv_out
for hidden in hiddens:
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
if layer_norm:
action_out = layers.layer_norm(action_out, center=True, scale=True)
action_out = tf.nn.relu(action_out)
action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
if dueling:
with tf.variable_scope("state_value"):
state_out = conv_out
for hidden in hiddens:
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
if layer_norm:
state_out = layers.layer_norm(state_out, center=True, scale=True)
state_out = tf.nn.relu(state_out)
state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
q_out = state_score + action_scores_centered
else:
q_out = action_scores
return q_out
def model(H, x, training):
net = dropout(x, 0.5, is_training = training)
# net = conv2d(net, 64, [3, 3], activation_fn = tf.nn.relu)
# net = conv2d(net, 64, [3, 3], activation_fn = tf.nn.relu)
# net = max_pool2d(net, [2, 2], padding = 'VALID')
# net = conv2d(net, 128, [3, 3], activation_fn = tf.nn.relu)
# net = conv2d(net, 128, [3, 3], activation_fn = tf.nn.relu)
# net = max_pool2d(net, [2, 2], padding = 'VALID')
# ksize = net.get_shape().as_list()
# net = max_pool2d(net, [ksize[1], ksize[2]])
net = fully_connected(flatten(net), 256, activation_fn = tf.nn.relu)
net = dropout(net, 0.5, is_training = training)
logits = fully_connected(net, 1, activation_fn = tf.nn.sigmoid)
preds = tf.cast(tf.greater(logits, 0.5), tf.int64)
return logits, preds
def atari_model(img_in, num_actions, scope, reuse=False):
# as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("action_value"):
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
return out
def delling_network():
""" Architecture according to Duelling DQN:
https://arxiv.org/abs/1511.06581
"""
@tt.model(tracker=tf.train.ExponentialMovingAverage(1 - .0005), # TODO: replace with original weight freeze
optimizer=tf.train.RMSPropOptimizer(6.25e-5, .95, .95, .01))
def q_network(x):
x /= 255
x = layers.conv2d(x, 32, 8, 4)
x = layers.conv2d(x, 64, 4, 2)
x = layers.conv2d(x, 64, 3, 1)
x = layers.flatten(x)
xv = layers.fully_connected(x, 512)
val = layers.fully_connected(xv, 1, activation_fn=None)
# val = tf.squeeze(val, 1)
xa = layers.fully_connected(x, 512)
adv = layers.fully_connected(xa, env.action_space.n, activation_fn=None)
q = val + adv - tf.reduce_mean(adv, axis=1, keep_dims=True)
q = tf.identity(q, name='Q')
return q
# Tests
def delling_network():
""" Architecture according to Duelling DQN:
https://arxiv.org/abs/1511.06581
"""
@tt.model(tracker=tf.train.ExponentialMovingAverage(1 - .0005), # TODO: replace with original weight freeze
optimizer=tf.train.RMSPropOptimizer(6.25e-5, .95, .95, .01))
def q_network(x):
x /= 255
x = layers.conv2d(x, 32, 8, 4)
x = layers.conv2d(x, 64, 4, 2)
x = layers.conv2d(x, 64, 3, 1)
x = layers.flatten(x)
xv = layers.fully_connected(x, 512)
val = layers.fully_connected(xv, 1, activation_fn=None)
# val = tf.squeeze(val, 1)
xa = layers.fully_connected(x, 512)
adv = layers.fully_connected(xa, env.action_space.n, activation_fn=None)
q = val + adv - tf.reduce_mean(adv, axis=1, keep_dims=True)
q = tf.identity(q, name='Q')
return q
# Tests
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False):
with tf.variable_scope(scope, reuse=reuse):
out = inpt
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = layers.convolution2d(out,
num_outputs=num_outputs,
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("action_value"):
action_out = out
for hidden in hiddens:
action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
if dueling:
with tf.variable_scope("state_value"):
state_out = out
for hidden in hiddens:
state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores_centered
else:
return action_scores
return out
def flatten(self, x_tensor):
batch_size = x_tensor.shape[0]
mult = 1
for a in range(1,len(x_tensor.shape)):
mult = mult * int(x_tensor.shape[a])
return tf.reshape(x_tensor,[-1,mult])
def convolve_and_collect(self,fmap,name,y_box_coords,y_class,phase):
# Apply 2 convolutions and get predictions for coordinates and classes and store them
# Get both of these guys and convolve
b = self.conv_layer_optional_pooling(fmap,4*self.num_default_boxes,(3,3),(1,1),name+"box_coords",phase,padding_type="SAME")
print(" =====> ",name+"box_coords",self.flatten(b))
y_box_coords.insert(0,flatten(b))
c = self.conv_layer_optional_pooling(fmap,self.num_classes*self.num_default_boxes,(3,3),(1,1),name+"class",phase,padding_type="SAME")
print(" =====> ",name+"class",self.flatten(c))
y_class.insert(0,flatten(c))
def to_trans(input):
if len(input.get_shape()) == 4:
input = layers.flatten(input)
num_inputs = input.get_shape()[1]
W_init = tf.constant_initializer(np.zeros((num_inputs, 2)))
b_init = tf.constant_initializer(np.array([0.,0.]))
return layers.fully_connected(input, 2,
weights_initializer=W_init,
biases_initializer=b_init)
def to_trans(input):
if len(input.get_shape()) == 4:
input = layers.flatten(input)
num_inputs = input.get_shape()[1]
W_init = tf.constant_initializer(np.zeros((num_inputs, 2)))
b_init = tf.constant_initializer(np.array([0.,0.]))
return layers.fully_connected(input, 2,
weights_initializer=W_init,
biases_initializer=b_init)
def make_dqn_body_nature(input_layer, trainable=True):
end_points = {}
net = layers.conv2d(inputs=input_layer,
num_outputs=32,
kernel_size=[8, 8],
stride=[4, 4],
activation_fn=tf.nn.relu,
padding="same",
scope="conv1",
trainable=trainable)
end_points['conv1'] = net
net = layers.conv2d(inputs=net,
num_outputs=64,
kernel_size=[4, 4],
stride=[2, 2],
activation_fn=tf.nn.relu,
padding="same",
scope="conv2",
trainable=trainable)
end_points['conv2'] = net
net = layers.conv2d(inputs=net,
num_outputs=64,
kernel_size=[3, 3],
stride=[1, 1],
activation_fn=tf.nn.relu,
padding="same",
scope="conv3",
trainable=trainable)
end_points['conv3'] = net
out = layers.flatten(net)
end_points['conv3_flatten'] = out
return out, end_points
def forward(image, num_actions):
# Conv1
out = layers.convolution2d(image, num_outputs=16, kernel_size=8, stride=4, activation_fn=tf.nn.relu, scope='conv1')
out = layers.convolution2d(out, num_outputs=32, kernel_size=4, stride=2, activation_fn=tf.nn.relu, scope='conv2')
out = layers.flatten(out, scope='flatten')
out = layers.fully_connected(out, num_outputs=256, activation_fn=tf.nn.relu, scope='fc1')
action_logprobs = tf.nn.log_softmax(layers.fully_connected(out, num_outputs=num_actions, activation_fn=None, scope='fc_actor'))
value = layers.fully_connected(out, num_outputs=1, activation_fn=None, scope='fc_critic')
value = tf.reshape(value, [-1])
return action_logprobs, value
def batch_average(x):
'''Sum over all dimensions and averages over the first'''
return tf.reduce_mean(tf.reduce_sum(flatten(x),1))
def encZ(x, ACTIVATION):
conv1 = tcl.conv2d(x, 32, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv1')
conv1 = activate(conv1, ACTIVATION)
conv2 = tcl.conv2d(conv1, 64, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv2')
conv2 = activate(conv2, ACTIVATION)
conv3 = tcl.conv2d(conv2, 128, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv3')
conv3 = activate(conv3, ACTIVATION)
conv4 = tcl.conv2d(conv3, 256, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv4')
conv4 = activate(conv4, ACTIVATION)
conv4_flat = tcl.flatten(conv4)
fc1 = tcl.fully_connected(conv4_flat, 4096, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='fc1')
fc1 = activate(fc1, ACTIVATION)
#fc1 = tcl.dropout(fc1, 0.5)
fc2 = tcl.fully_connected(fc1, 100, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='fc2')
print 'input:',x
print 'conv1:',conv1
print 'conv2:',conv2
print 'conv3:',conv3
print 'conv4:',conv4
print 'fc1:',fc1
print 'fc2:',fc2
print 'END ENCODER\n'
tf.add_to_collection('vars', conv1)
tf.add_to_collection('vars', conv2)
tf.add_to_collection('vars', conv3)
tf.add_to_collection('vars', conv4)
tf.add_to_collection('vars', fc1)
tf.add_to_collection('vars', fc2)
return fc2
def encZ(x, ACTIVATION):
conv1 = tcl.conv2d(x, 32, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv1')
conv1 = activate(conv1, ACTIVATION)
conv2 = tcl.conv2d(conv1, 64, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv2')
conv2 = activate(conv2, ACTIVATION)
conv3 = tcl.conv2d(conv2, 128, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv3')
conv3 = activate(conv3, ACTIVATION)
conv4 = tcl.conv2d(conv3, 256, 5, 2, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='conv4')
conv4 = activate(conv4, ACTIVATION)
conv4_flat = tcl.flatten(conv4)
fc1 = tcl.fully_connected(conv4_flat, 4096, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='fc1')
fc1 = activate(fc1, ACTIVATION)
fc2 = tcl.fully_connected(fc1, 100, activation_fn=tf.identity, normalizer_fn=tcl.batch_norm, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='fc2')
print 'input:',x
print 'conv1:',conv1
print 'conv2:',conv2
print 'conv3:',conv3
print 'conv4:',conv4
print 'fc1:',fc1
print 'fc2:',fc2
print 'END ENCODER\n'
tf.add_to_collection('vars', conv1)
tf.add_to_collection('vars', conv2)
tf.add_to_collection('vars', conv3)
tf.add_to_collection('vars', conv4)
tf.add_to_collection('vars', fc1)
tf.add_to_collection('vars', fc2)
return fc2