def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' fully connected layer '''
name = get_name('dense', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', [int(x.get_shape()[
1]), num_units], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0])
x_init = tf.matmul(x, V_norm)
m_init, v_init = tf.nn.moments(x_init, [0])
scale_init = init_scale / tf.sqrt(v_init + 1e-10)
g = tf.get_variable('g', dtype=tf.float32,
initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32,
initializer=-m_init * scale_init, trainable=True)
x_init = tf.reshape(
scale_init, [1, num_units]) * (x_init - tf.reshape(m_init, [1, num_units]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
tf.assert_variables_initialized([V, g, b])
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, V)
scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
x = tf.reshape(scaler, [1, num_units]) * \
x + tf.reshape(b, [1, num_units])
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
python类assert_variables_initialized()的实例源码
def conv2d(x, num_filters, filter_size=[3, 3], stride=[1, 1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' convolutional layer '''
name = get_name('conv2d', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size + [int(x.get_shape()[-1]), num_filters],
tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0, 1, 2])
x_init = tf.nn.conv2d(x, V_norm, [1] + stride + [1], pad)
m_init, v_init = tf.nn.moments(x_init, [0, 1, 2])
scale_init = init_scale / tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32,
initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32,
initializer=-m_init * scale_init, trainable=True)
x_init = tf.reshape(scale_init, [
1, 1, 1, num_filters]) * (x_init - tf.reshape(m_init, [1, 1, 1, num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
tf.assert_variables_initialized([V, g, b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g, [1, 1, 1, num_filters]) * \
tf.nn.l2_normalize(V, [0, 1, 2])
# calculate convolutional layer output
x = tf.nn.bias_add(tf.nn.conv2d(x, W, [1] + stride + [1], pad), b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
def deconv(inp, name, filter_size, out_channels, stride=1,
padding='SAME', nonlinearity=None, init_scale=1.0):
""" Deconvolution layer. See `conv`"""
with tf.variable_scope(name):
strides = [1, stride, stride, 1]
[N, H, W, in_channels] = inp.get_shape().as_list()
if padding == 'SAME':
target_shape = [N, H * stride, W * stride, out_channels]
else:
target_shape = [N, H * stride + filter_size[0] - 1, W * stride + filter_size[1] - 1, out_channels]
target_shape = tf.constant(target_shape, dtype=tf.int32)
if tf.GLOBAL['init']:
V = get_variable('V', shape=filter_size + (out_channels, in_channels), dtype=tf.float32,
initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0, 1, 3])
out = tf.nn.conv2d_transpose(inp, V_norm, target_shape, strides, padding)
m_init, v_init = tf.nn.moments(out, [0, 1, 2])
scale_init = init_scale / tf.sqrt(v_init + 1e-8)
g = get_variable('g', shape=None, dtype=tf.float32, initializer=scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg']))
b = get_variable('b', shape=None, dtype=tf.float32, initializer=-m_init * scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg']))
out = tf.reshape(scale_init, [1, 1, 1, out_channels]) * (out - tf.reshape(m_init, [1, 1, 1, out_channels]))
if nonlinearity is not None:
out = nonlinearity(out)
else:
V, g, b = get_variable('V'), get_variable('g'), get_variable('b')
tf.assert_variables_initialized([V, g, b])
W = g[None, None, :, None] * tf.nn.l2_normalize(V, [0, 1, 3])
out = tf.nn.conv2d_transpose(inp, W, target_shape, strides, padding) + b[None, None, None]
if nonlinearity is not None:
out = nonlinearity(out)
return out
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' fully connected layer '''
name = get_name('dense', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', [int(x.get_shape()[1]),num_units], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0])
x_init = tf.matmul(x, V_norm)
m_init, v_init = tf.nn.moments(x_init, [0])
scale_init = init_scale/tf.sqrt(v_init + 1e-10)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,num_units])*(x_init-tf.reshape(m_init,[1,num_units]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V,g,b = get_vars_maybe_avg(['V','g','b'], ema)
tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, V)
scaler = g/tf.sqrt(tf.reduce_sum(tf.square(V),[0]))
x = tf.reshape(scaler,[1,num_units])*x + tf.reshape(b,[1,num_units])
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
def conv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' convolutional layer '''
name = get_name('conv2d', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size+[int(x.get_shape()[-1]),num_filters], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,2])
x_init = tf.nn.conv2d(x, V_norm, [1]+stride+[1], pad)
m_init, v_init = tf.nn.moments(x_init, [0,1,2])
scale_init = init_scale/tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g,[1,1,1,num_filters])*tf.nn.l2_normalize(V,[0,1,2])
# calculate convolutional layer output
x = tf.nn.bias_add(tf.nn.conv2d(x, W, [1]+stride+[1], pad), b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
def deconv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' transposed convolutional layer '''
name = get_name('deconv2d', counters)
xs = int_shape(x)
if pad=='SAME':
target_shape = [xs[0], xs[1]*stride[0], xs[2]*stride[1], num_filters]
else:
target_shape = [xs[0], xs[1]*stride[0] + filter_size[0]-1, xs[2]*stride[1] + filter_size[1]-1, num_filters]
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size+[num_filters,int(x.get_shape()[-1])], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,3])
x_init = tf.nn.conv2d_transpose(x, V_norm, target_shape, [1]+stride+[1], padding=pad)
m_init, v_init = tf.nn.moments(x_init, [0,1,2])
scale_init = init_scale/tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g,[1,1,num_filters,1])*tf.nn.l2_normalize(V,[0,1,3])
# calculate convolutional layer output
x = tf.nn.conv2d_transpose(x, W, target_shape, [1]+stride+[1], padding=pad)
x = tf.nn.bias_add(x, b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
def deconv2d(x, num_filters, filter_size=[3, 3], stride=[1, 1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' transposed convolutional layer '''
name = get_name('deconv2d', counters)
xs = int_shape(x)
if pad == 'SAME':
target_shape = [xs[0], xs[1] * stride[0],
xs[2] * stride[1], num_filters]
else:
target_shape = [xs[0], xs[1] * stride[0] + filter_size[0] -
1, xs[2] * stride[1] + filter_size[1] - 1, num_filters]
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size + [num_filters, int(x.get_shape(
)[-1])], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0, 1, 3])
x_init = tf.nn.conv2d_transpose(x, V_norm, target_shape, [
1] + stride + [1], padding=pad)
m_init, v_init = tf.nn.moments(x_init, [0, 1, 2])
scale_init = init_scale / tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32,
initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32,
initializer=-m_init * scale_init, trainable=True)
x_init = tf.reshape(scale_init, [
1, 1, 1, num_filters]) * (x_init - tf.reshape(m_init, [1, 1, 1, num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
tf.assert_variables_initialized([V, g, b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g, [1, 1, num_filters, 1]) * \
tf.nn.l2_normalize(V, [0, 1, 3])
# calculate convolutional layer output
x = tf.nn.conv2d_transpose(
x, W, target_shape, [1] + stride + [1], padding=pad)
x = tf.nn.bias_add(x, b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
def conv(inp, name, filter_size, out_channels, stride=1,
padding='SAME', nonlinearity=None, init_scale=1.0, dilation=None):
"""Convolutional layer.
If tf.GLOBAL['init'] is true, this creates the layers paramenters (g, b, W) : L(x) = g|W| (*) x + b
Args:
x: input tensor
name (str): variable scope name
filter_size (int pair): filter size
out_channels (int): number of output channels
strid (int): horizontal and vertical stride
padding (str): padding mode
nonlinearity (func): activation function
init_scale: initial scale for the weights and bias variables
dilation: optional dilation rate
"""
with tf.variable_scope(name):
strides = [1, stride, stride, 1]
in_channels = inp.get_shape().as_list()[3]
if tf.GLOBAL['init']:
V = get_variable('V', shape=tuple(filter_size) + (in_channels, out_channels), dtype=tf.float32,
initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0, 1, 2])
if dilation is None:
out = tf.nn.conv2d(inp, V_norm, strides, padding)
else:
assert(stride == 1)
out = tf.nn.atrous_conv2d(inp, V_norm, dilation, padding)
m_init, v_init = tf.nn.moments(out, [0, 1, 2])
scale_init = init_scale / tf.sqrt(v_init + 1e-8)
g = get_variable('g', shape=None, dtype=tf.float32, initializer=scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg']))
b = get_variable('b', shape=None, dtype=tf.float32, initializer=-m_init * scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg']))
out = tf.reshape(scale_init, [1, 1, 1, out_channels]) * (out - tf.reshape(m_init, [1, 1, 1, out_channels]))
if nonlinearity is not None:
out = nonlinearity(out)
else:
V, g, b = get_variable('V'), get_variable('g'), get_variable('b')
tf.assert_variables_initialized([V, g, b])
W = g[None, None, None] * tf.nn.l2_normalize(V, [0, 1, 2])
if dilation is None:
out = tf.nn.conv2d(inp, W, strides, padding) + b[None, None, None]
else:
assert(stride == 1)
out = tf.nn.atrous_conv2d(inp, W, dilation, padding) + b[None, None, None]
if nonlinearity is not None:
out = nonlinearity(out)
return out
def __init__(self, session,
optimizer,
actor_network,
critic_network,
state_dim,
num_actions,
init_exp=0.1, # initial exploration prob
final_exp=0.0, # final exploration prob
anneal_steps=1000, # N steps for annealing exploration
discount_factor=0.99, # discount future rewards
reg_param=0.001, # regularization constants
max_gradient=5, # max gradient norms
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.actor_network = actor_network
self.critic_network = critic_network
# training parameters
self.state_dim = state_dim
self.num_actions = num_actions
self.discount_factor = discount_factor
self.max_gradient = max_gradient
self.reg_param = reg_param
# exploration parameters
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
# counters
self.train_iteration = 0
# rollout buffer
self.state_buffer = []
self.reward_buffer = []
self.action_buffer = []
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
def __init__(self, session,
optimizer,
policy_network,
state_dim,
num_actions,
init_exp=0.5, # initial exploration prob
final_exp=0.0, # final exploration prob
anneal_steps=10000, # N steps for annealing exploration
discount_factor=0.99, # discount future rewards
reg_param=0.001, # regularization constants
max_gradient=5, # max gradient norms
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.policy_network = policy_network
# training parameters
self.state_dim = state_dim
self.num_actions = num_actions
self.discount_factor = discount_factor
self.max_gradient = max_gradient
self.reg_param = reg_param
# exploration parameters
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
# counters
self.train_iteration = 0
# rollout buffer
self.state_buffer = []
self.reward_buffer = []
self.action_buffer = []
# record reward history for normalization
self.all_rewards = []
self.max_reward_length = 1000000
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
def __init__(self, session,
optimizer,
actor_network,
critic_network,
state_dim,
action_dim,
batch_size=32,
replay_buffer_size=1000000, # size of replay buffer
store_replay_every=1, # how frequent to store experience
discount_factor=0.99, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
noise_sigma=0.20,
noise_theta=0.15,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.actor_network = actor_network
self.critic_network = critic_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# training parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.action_dim = action_dim
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.max_gradient = max_gradient
self.reg_param = reg_param
# Ornstein-Uhlenbeck noise for exploration
self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
def __init__(self, session,
optimizer,
q_network,
state_dim,
num_actions,
batch_size=32,
init_exp=0.5, # initial exploration prob
final_exp=0.1, # final exploration prob
anneal_steps=10000, # N steps for annealing exploration
replay_buffer_size=10000,
store_replay_every=5, # how frequent to store experience
discount_factor=0.9, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
double_q_learning=False,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.q_network = q_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# Q learning parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.num_actions = num_actions
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.double_q_learning = double_q_learning
# training parameters
self.max_gradient = max_gradient
self.reg_param = reg_param
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every