def get_weight_variable(shape, name=None, type='xavier_uniform', regularize=True, **kwargs):
initialise_from_constant = False
if type == 'xavier_uniform':
initial = xavier_initializer(uniform=True, dtype=tf.float32)
elif type == 'xavier_normal':
initial = xavier_initializer(uniform=False, dtype=tf.float32)
elif type == 'he_normal':
initial = variance_scaling_initializer(uniform=False, factor=2.0, mode='FAN_IN', dtype=tf.float32)
elif type == 'he_uniform':
initial = variance_scaling_initializer(uniform=True, factor=2.0, mode='FAN_IN', dtype=tf.float32)
elif type == 'caffe_uniform':
initial = variance_scaling_initializer(uniform=True, factor=1.0, mode='FAN_IN', dtype=tf.float32)
elif type == 'simple':
stddev = kwargs.get('stddev', 0.02)
initial = tf.truncated_normal(shape, stddev=stddev, dtype=tf.float32)
initialise_from_constant = True
elif type == 'bilinear':
weights = _bilinear_upsample_weights(shape)
initial = tf.constant(weights, shape=shape, dtype=tf.float32)
initialise_from_constant = True
else:
raise ValueError('Unknown initialisation requested: %s' % type)
if name is None: # This keeps to option open to use unnamed Variables
weight = tf.Variable(initial)
else:
if initialise_from_constant:
weight = tf.get_variable(name, initializer=initial)
else:
weight = tf.get_variable(name, shape=shape, initializer=initial)
if regularize:
tf.add_to_collection('weight_variables', weight)
return weight
python类variance_scaling_initializer()的实例源码
def BN_ReLU(self, net):
"""Batch Normalization and ReLU."""
# 'gamma' is not used as the next layer is ReLU
net = batch_norm(net,
center=True,
scale=False,
activation_fn=tf.nn.relu, )
self._activation_summary(net)
return net
# def conv2d(self, net, num_ker, ker_size, stride):
# 1D-convolution
net = convolution2d(
net,
num_outputs=num_ker,
kernel_size=[ker_size, 1],
stride=[stride, 1],
padding='SAME',
activation_fn=None,
normalizer_fn=None,
weights_initializer=variance_scaling_initializer(),
weights_regularizer=l2_regularizer(self.weight_decay),
biases_initializer=tf.zeros_initializer)
return net
def get_arg_scope(is_training):
weight_decay_l2 = 0.1
batch_norm_decay = 0.999
batch_norm_epsilon = 0.0001
with slim.arg_scope([slim.conv2d, slim.fully_connected, layers.separable_convolution2d],
weights_regularizer = slim.l2_regularizer(weight_decay_l2),
biases_regularizer = slim.l2_regularizer(weight_decay_l2),
weights_initializer = layers.variance_scaling_initializer(),
):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon
}
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training = is_training):
with slim.arg_scope([slim.batch_norm],
**batch_norm_params):
with slim.arg_scope([slim.conv2d, layers.separable_convolution2d, layers.fully_connected],
activation_fn = tf.nn.elu,
normalizer_fn = slim.batch_norm,
normalizer_params = batch_norm_params) as scope:
return scope
def he_normal(seed=None, scale=1.0, dtype=tf.float32):
"""
He Normal initializer
Kaiming He et al. (2015): Delving deep into rectifiers: Surpassing human-level
performance on imagenet classification. arXiv preprint arXiv:1502.01852.
Args:
scale: float
Scaling factor for the weights. Set this to ``1.0`` for linear and
sigmoid units, to ``sqrt(2)`` for rectified linear units, and
to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
leakiness ``alpha``. Other transfer functions may need different factors.
"""
return variance_scaling_initializer(factor=2.0 * scale, mode='FAN_IN',
uniform=False, seed=seed, dtype=dtype)
def resnet_arg_scope(
weight_decay=0.0001,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True,
):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
}
l2_regularizer = layers.l2_regularizer(weight_decay)
arg_scope_layers = arg_scope(
[layers.conv2d, my_layers.preact_conv2d, layers.fully_connected],
weights_initializer=layers.variance_scaling_initializer(),
weights_regularizer=l2_regularizer,
activation_fn=tf.nn.relu)
arg_scope_conv = arg_scope(
[layers.conv2d, my_layers.preact_conv2d],
normalizer_fn=layers.batch_norm,
normalizer_params=batch_norm_params)
with arg_scope_layers, arg_scope_conv as arg_sc:
return arg_sc
def conv1d(self, net, num_ker, ker_size, stride):
# 1D-convolution
net = convolution2d(
net,
num_outputs=num_ker,
kernel_size=[ker_size, 1],
stride=[stride, 1],
padding='SAME',
activation_fn=None,
normalizer_fn=None,
weights_initializer=variance_scaling_initializer(),
weights_regularizer=l2_regularizer(self.weight_decay),
biases_initializer=tf.zeros_initializer)
return net
def he_normal(seed=None, scale=1.0, dtype=tf.float32):
return variance_scaling_initializer(factor=2.0 * scale, mode='FAN_IN',
uniform=False, seed=seed, dtype=dtype)
def he_uniform(seed=None, scale=1.0, dtype=tf.float32):
return variance_scaling_initializer(factor=2.0 * scale, mode='FAN_IN',
uniform=True, seed=seed, dtype=dtype)
# Code borrowed from Lasagne https://github.com/Lasagne/Lasagne under MIT license
def conv2d(self, net, num_ker, ker_size, stride):
net = convolution2d(
net,
num_outputs=num_ker,
kernel_size=[ker_size, ker_size],
stride=[stride, stride],
padding='SAME',
activation_fn=None,
normalizer_fn=None,
weights_initializer=variance_scaling_initializer(),
weights_regularizer=l2_regularizer(FLAGS.weight_decay),
biases_initializer=tf.zeros_initializer)
return net
def he_uniform(seed=None, scale=1.0, dtype=tf.float32):
"""
He Uniform initializer
Args:
scale: float
Scaling factor for the weights. Set this to ``1.0`` for linear and
sigmoid units, to ``sqrt(2)`` for rectified linear units, and
to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
leakiness ``alpha``. Other transfer functions may need different factors.
"""
return variance_scaling_initializer(factor=2.0 * scale, mode='FAN_IN',
uniform=True, seed=seed, dtype=dtype)
def pytorch_initializer(uniform=True, seed=None, dtype=dtypes.float32):
return variance_scaling_initializer(factor=1./3, mode='FAN_IN', uniform=uniform, seed=seed, dtype=dtype)
def vgg_arg_scope(
weight_decay=0.0005,
use_batch_norm=False):
""""""
batch_norm_params = {
# Decay for the moving averages.
'decay': 0.9997,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
}
normalizer_fn = layers.batch_norm if use_batch_norm else None
normalizer_params = batch_norm_params if use_batch_norm else None
l2_regularizer = layers.l2_regularizer(weight_decay) # 0.00004
with arg_scope(
[layers.fully_connected],
biases_initializer=tf.constant_initializer(0.1),
weights_initializer=layers.variance_scaling_initializer(factor=1.0),
weights_regularizer=l2_regularizer,
activation_fn=tf.nn.relu):
with arg_scope(
[layers.conv2d],
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params,
weights_initializer=layers.variance_scaling_initializer(factor=1.0),
weights_regularizer=l2_regularizer,
activation_fn=tf.nn.relu) as arg_sc:
return arg_sc
def inception_arg_scope(
weight_decay=0.00004,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
):
# Parameters for BatchNorm.
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
}
if use_batch_norm:
normalizer_fn = layers.batch_norm
normalizer_params = batch_norm_params
else:
normalizer_fn = None
normalizer_params = {}
# Set weight_decay for weights in Conv and FC layers.
l2_regularizer = layers.l2_regularizer(weight_decay)
activation_fn = tf.nn.relu # tf.nn.elu
arg_scope_weights = arg_scope(
[layers.conv2d, layers.fully_connected],
weights_initializer=layers.variance_scaling_initializer(factor=1.0),
weights_regularizer=l2_regularizer
)
arg_scope_conv = arg_scope(
[layers.conv2d],
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params
)
with arg_scope_weights, arg_scope_conv as arg_sc:
return arg_sc
def dense(x,
num_outputs,
scope=None,
activation=None,
reuse=None,
bn=False,
post_bn=False,
phase=None):
with tf.variable_scope(scope, 'dense', reuse=reuse):
# convert x to 2-D tensor
dim = np.prod(x._shape_as_list()[1:])
x = tf.reshape(x, [-1, dim])
weights_shape = (x.get_shape().dims[-1], num_outputs)
# dense layer
weights = tf.get_variable('weights', weights_shape,
initializer=variance_scaling_initializer())
biases = tf.get_variable('biases', [num_outputs],
initializer=tf.zeros_initializer)
output = tf.matmul(x, weights) + biases
if bn: output = batch_norm(output, phase, scope='bn')
if activation: output = activation(output)
if post_bn: output = batch_norm(output, phase, scope='post_bn')
return output
def conv2d(x,
num_outputs,
kernel_size,
strides,
padding='SAME',
activation=None,
bn=False,
post_bn=False,
phase=None,
scope=None,
reuse=None):
# Convert int to list
kernel_size = [kernel_size] * 2 if isinstance(kernel_size, int) else kernel_size
strides = [strides] * 2 if isinstance(strides, int) else strides
# Convert list to valid list
kernel_size = list(kernel_size) + [x.get_shape().dims[-1], num_outputs]
strides = [1] + list(strides) + [1]
# Conv operation
with tf.variable_scope(scope, 'conv2d', reuse=reuse):
kernel = tf.get_variable('weights', kernel_size,
initializer=variance_scaling_initializer())
biases = tf.get_variable('biases', [num_outputs],
initializer=tf.zeros_initializer)
output = tf.nn.conv2d(x, kernel, strides, padding, name='conv2d')
output += biases
if bn: output = batch_norm(output, phase, scope='bn')
if activation: output = activation(output)
if post_bn: output = batch_norm(output, phase, scope='post_bn')
return output
def resnn(self, image_batch):
"""Build the resnn model.
Args:
image_batch: Sequences returned from inputs_train() or inputs_eval.
Returns:
Logits.
"""
# First convolution
with tf.variable_scope('conv_layer1'):
net = self.conv2d(image_batch, self.groups[0].num_ker, 5, 1)
net = self.BN_ReLU(net)
# Max pool
if FLAGS.max_pool:
net = tf.nn.max_pool(net,
[1, 3, 3, 1],
strides=[1, 1, 1, 1],
padding='SAME')
# stacking Residual Units
for group_i, group in enumerate(self.groups):
for unit_i in range(group.num_units):
net = self.residual_unit(net, group_i, unit_i)
# an extra activation before average pooling
if FLAGS.special_first:
with tf.variable_scope('special_BN_ReLU'):
net = self.BN_ReLU(net)
# padding should be VALID for global average pooling
# output: batch*1*1*channels
net_shape = net.get_shape().as_list()
net = tf.nn.avg_pool(net,
ksize=[1, net_shape[1], net_shape[2], 1],
strides=[1, 1, 1, 1],
padding='VALID')
net_shape = net.get_shape().as_list()
softmax_len = net_shape[1] * net_shape[2] * net_shape[3]
net = tf.reshape(net, [-1, softmax_len])
# add dropout
if FLAGS.dropout:
with tf.name_scope("dropout"):
net = tf.nn.dropout(net, FLAGS.dropout_keep_prob)
# 2D-fully connected nueral network
with tf.variable_scope('FC-layer'):
net = fully_connected(
net,
num_outputs=FLAGS.num_cats,
activation_fn=None,
normalizer_fn=None,
weights_initializer=variance_scaling_initializer(),
weights_regularizer=l2_regularizer(FLAGS.weight_decay),
biases_initializer=tf.zeros_initializer, )
return net
def conv2d_transpose(x,
num_outputs,
kernel_size,
strides,
padding='SAME',
output_shape=None,
output_like=None,
activation=None,
bn=False,
post_bn=False,
phase=None,
scope=None,
reuse=None):
# Convert int to list
kernel_size = [kernel_size] * 2 if isinstance(kernel_size, int) else kernel_size
strides = [strides] * 2 if isinstance(strides, int) else strides
# Convert list to valid list
kernel_size = list(kernel_size) + [num_outputs, x.get_shape().dims[-1]]
strides = [1] + list(strides) + [1]
# Get output shape both as tensor obj and as list
if output_shape:
bs = tf.shape(x)[0]
_output_shape = tf.stack([bs] + output_shape[1:])
elif output_like:
_output_shape = tf.shape(output_like)
output_shape = output_like.get_shape()
else:
assert padding == 'SAME', "Shape inference only applicable with padding is SAME"
bs, h, w, c = x._shape_as_list()
bs_tf = tf.shape(x)[0]
_output_shape = tf.stack([bs_tf, strides[1] * h, strides[2] * w, num_outputs])
output_shape = [bs, strides[1] * h, strides[2] * w, num_outputs]
# Transposed conv operation
with tf.variable_scope(scope, 'conv2d', reuse=reuse):
kernel = tf.get_variable('weights', kernel_size,
initializer=variance_scaling_initializer())
biases = tf.get_variable('biases', [num_outputs],
initializer=tf.zeros_initializer)
output = tf.nn.conv2d_transpose(x, kernel, _output_shape, strides,
padding, name='conv2d_transpose')
output += biases
output.set_shape(output_shape)
if bn: output = batch_norm(output, phase, scope='bn')
if activation: output = activation(output)
if post_bn: output = batch_norm(output, phase, scope='post_bn')
return output
def dense_policy_graph(inputs,
env_config,
activation_fn=tf.nn.tanh,
scope='policy_graph',
reuse=None,
trainable=True):
with tf.variable_scope(scope, reuse=reuse):
net = inputs
net = tf.contrib.layers.flatten(net)
net = tf.layers.dense(
inputs=net,
units=64,
activation=activation_fn,
kernel_initializer=variance_scaling_initializer(factor=1),
trainable=trainable)
net = tf.layers.dense(
inputs=net,
units=64,
activation=activation_fn,
kernel_initializer=variance_scaling_initializer(factor=1),
trainable=trainable)
if env_config['action_space'] == 'continuous':
mean = tf.layers.dense(
inputs=net,
units=env_config['num_actions'],
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
name='mean',
trainable=trainable)
logstd = tf.get_variable(
'logstd', (1, env_config['num_actions']),
tf.float32,
initializer=tf.zeros_initializer(),
trainable=trainable)
return mean, logstd
if env_config['action_space'] == 'discrete':
logits = tf.layers.dense(
inputs=net,
units=env_config['num_actions'],
name='logits',
trainable=trainable)
return logits