def variable_scope(self):
"""Returns the variable_scope declared by the module.
It is valid for library users to access the internal templated
variable_scope, but only makes sense to do so after connection. Therefore we
raise an error here if the variable_scope is requested before connection.
The only case where it does make sense to access the variable_scope before
connection is to get the post-uniquification name, which we support using
the separate .name property.
Returns:
variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.
Raises:
NotConnectedError: If the module is not connected to the Graph.
"""
self._ensure_is_connected()
return self._template.variable_scope
python类VariableScope()的实例源码
def get_variable_scope_name(value):
"""Returns the name of the variable scope indicated by the given value.
Args:
value: String, variable scope, or object with `variable_scope` attribute
(e.g., Sonnet module).
Returns:
The name (a string) of the corresponding variable scope.
Raises:
ValueError: If `value` does not identify a variable scope.
"""
# If the object has a "variable_scope" property, use it.
value = getattr(value, "variable_scope", value)
if isinstance(value, tf.VariableScope):
return value.name
elif isinstance(value, six.string_types):
return value
else:
raise ValueError("Not a variable scope: {}".format(value))
def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
"""Returns a tuple `tf.Variable`s in a scope for a given collection.
Args:
scope: `tf.VariableScope` or string to retrieve variables from.
collection: Collection to restrict query to. By default this is
`tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
variables such as moving averages.
Returns:
A tuple of `tf.Variable` objects.
"""
scope_name = get_variable_scope_name(scope)
# Escape the name in case it contains any "." characters. Add a closing slash
# so we will not search any scopes that have this scope name as a prefix.
scope_name = re.escape(scope_name) + "/"
return tuple(tf.get_collection(collection, scope_name))
def _set_scope(self, scope):
"""Set the given scope as the scope of the layer.
If not already present, set the scope for the layer. The name of such scope
will be accessible through the `self.scope` property.
Argsuments:
scope: the given scope, of type `str` of `tf.VariableScope`. If `None`,
the one returned from the `self._default_scope()` method will be used.
"""
if self._scope is None:
if self._reuse:
self._scope = next(tf.variable_scope( # pylint: disable=I0011,E1101
scope if scope is not None else self._default_scope()).gen)
else:
self._scope = next(tf.variable_scope( # pylint: disable=I0011,E1101
scope, default_name=self._default_scope().name).gen)
def __call__(self, values, weights=None, scope=None):
"""Computes the streaming average.
This method builds the fragment of computational graph that computes the streaming
average, returnins a variable representing the actual streaming average value and
an `Op` to update such value.
Arguments:
values: a `Tensor` of arbitrary dimensions.
weights: pptional `Tensor` whose rank is either `0`, or the same rank
as values, and must be broadcastable to values (i.e., all dimensions must
be either `1`, or the same as the corresponding values dimension). It contains
the weights for summing up all the elements in `values`.
scope: a `str` or a `tf.VariableScope` used for building the fragment
of the computational graph that computes the streaming average.
Returns:
mean: a `Tensor` representing the current mean, which is a reference
to `self.value`.
update_op: an `Op` that updates the streaming value, which is a reference
to `self.update_op`.
"""
self.compute(values, weights=weights, scope=scope)
return self.value, self.update_op
def variable_scope(self):
"""Returns the variable_scope declared by the module.
It is valid for library users to access the internal templated
variable_scope, but only makes sense to do so after connection. Therefore
we raise an error here if the variable_scope is requested before connection.
The only case where it does make sense to access the variable_scope before
connection is to get the post-uniquification name, which we support using
the separate .name property.
Returns:
variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.
Raises:
NotConnectedError: If the module is not connected to the Graph.
"""
self._ensure_is_connected()
return self._template.variable_scope
def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
"""Returns a tuple `tf.Variable`s in a scope for a given collection.
Args:
scope: `tf.VariableScope` instance to retrieve variables from.
collection: Collection to restrict query to. By default this is
`tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
variables such as moving averages.
Returns:
A tuple of `tf.Variable` objects.
"""
# Escape the name in case it contains any "." characters. Add a closing slash
# so we will not search any scopes that have this scope name as a prefix.
scope_name = re.escape(scope.name) + "/"
return tuple(tf.get_collection(collection, scope_name))
def variable_scope(self):
"""Returns the variable_scope declared by the module.
It is valid for library users to access the internal templated
variable_scope, but only makes sense to do so after connection. Therefore
we raise an error here if the variable_scope is requested before connection.
The only case where it does make sense to access the variable_scope before
connection is to get the post-uniquification name, which we support using
the separate .name property.
Returns:
variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.
Raises:
NotConnectedError: If the module is not connected to the Graph.
"""
self._ensure_is_connected()
return self._template.variable_scope
def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
"""Returns a tuple `tf.Variable`s in a scope for a given collection.
Args:
scope: `tf.VariableScope` instance to retrieve variables from.
collection: Collection to restrict query to. By default this is
`tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
variables such as moving averages.
Returns:
A tuple of `tf.Variable` objects.
"""
# Escape the name in case it contains any "." characters. Add a closing slash
# so we will not search any scopes that have this scope name as a prefix.
scope_name = re.escape(scope.name) + "/"
return tuple(tf.get_collection(collection, scope_name))
def __init__(self, input_type=None, output_type=None, name_or_scope=None):
"""Creates the layer.
Args:
input_type: A type.
output_type: A type.
name_or_scope: A string or variable scope. If a string, a new variable
scope will be created by calling
[`create_variable_scope`](#create_variable_scope), with defaults
inherited from the current variable scope. If no caching device is set,
it will be set to `lambda op: op.device`. This is because `tf.while` can
be very inefficient if the variables it uses are not cached locally.
"""
if name_or_scope is None: name_or_scope = type(self).__name__
if isinstance(name_or_scope, tf.VariableScope):
self._vscope = name_or_scope
name = str(self._vscope.name)
elif isinstance(name_or_scope, six.string_types):
self._vscope = create_variable_scope(name_or_scope)
name = name_or_scope
else:
raise TypeError('name_or_scope must be a tf.VariableScope or a string: '
'%s' % (name_or_scope,))
if self._vscope.caching_device is None:
self._vscope.set_caching_device(lambda op: op.device)
super(Layer, self).__init__(input_type, output_type, name)
if not hasattr(self, '_constructor_name'):
self._constructor_name = '__.%s' % self.__class__.__name__
if not hasattr(self, '_constructor_args'):
self._constructor_args = None
if not hasattr(self, '_constructor_kwargs'):
self._constructor_kwargs = None
def __init__(self, states, inner_size, trainable=True, scope=None):
"""Initiailzes a new instance of the BahdanauAttention class.
The attention mechanism implemented in this class is the one
described by Bahdanau et al. here: https://arxiv.org/abs/1409.0473.
The attention states and the query are projected to the attention
inner size, then summed together and processed with a tanh and
finally dot producted with an attention vector. All the operations
are performed on a reference size, named as the attention size, which
must be set during the initialization phase (with the `size` argument).
Arguments:
states: 3-D Tensor of shape [batch, timesteps, state] representing the
states on which the attention scores will be computed; the third dimension
of the tensor must be statically determined.
inner_size: int representing the inner attention size;
trainable: if True, variables will be trainable;
scope: None, str or tf.VariableScope representing the variable scope
of the layer which will be used to create all the needed variables.
Raises:
ValueError: if the last dimension of the `state` argument is not
statically determined.
"""
super(BahdanauAttention, self).__init__(trainable=trainable, scope=scope)
self._states = states
self._size = inner_size
self._memory = None
self._vector = None
self._var_op_names = set()
# check that the last dimension of the `states`
# variable is fully defined.
state_size = states.get_shape()[-1].value
if state_size is None:
raise ValueError('Last dimension of `states` must be defined, found %s'
% str(tf.shape(states)))
self._state_size = state_size
def __init__(self, shortlist_size, decoder_out_size, state_size,
trainable=True, scope='PointingSoftmaxOutput'):
"""Initializes a new instance.
Arguments:
shorlist_size: a `int` representing the dimension of the known output vocabulary.
decoder_out_size: a `int` representing the output size of the recoder.
state_size: a `int` representing the size of the attention states.
trainable: if `True`, the created variables will be trainable.
scope: VariableScope for the created subgraph;.
"""
super(PointingSoftmaxOutput, self).__init__(trainable=trainable, scope=scope)
self._shortlist_size = shortlist_size
self._decoder_out_size = decoder_out_size
self._state_size = state_size
def as_scope(scope):
"""Get the proper variable scope.
Given an object that can represent a `tf.VariableScope`,
namely a `str` or a `tf.VariableScope`, performs type checking
and return a proper `tf.VariableScope` object. Such function is
hancy when a function accepts an argument serving as a variable
scope but doesn's know its proper type.
Arguments:
scope: a `str` or a `tf.VariableScope` representing a variable scope.
Returns:
a `tf.VariableScope` instance.
Raises:
ValueError: if `scope` is `None`.
TypeError: if `scope` is neither `str` or `tf.VariableScope`.
Example:
```python
from dket import utils
def do_something(scope):
scope = utils.as_scope(scope or 'DefaultScope')
with tf.variable_scope(scope) as scope:
# do something
pass
"""
if scope is None:
raise ValueError('Cannot create a scope from a None.')
if isinstance(scope, str):
return next(tf.variable_scope(scope).gen) # pylint: disable=I0011,E1101
if isinstance(scope, tf.VariableScope):
return scope
raise TypeError("""`scope` argument can be of type str, """
"""tf.VariableScope, while %s found.""",
(str(type(scope))))
```
def get_variables(prefix=None):
"""Get variables by their name prefix.
Arguments:
prefix: a `str` or a `tf.VariableScope` instance.
Returns:
a list of `tf.Variable` with their name starting with the
given prefix, i.e. all those variables under the scope
specified by the prefix.
"""
prefix = prefix or tf.get_variable_scope().name
return [var for var in tf.global_variables()
if var.name.startswith(prefix)]
def __init__(self, subnet, name=None, scope=None):
"""Create the Shared operator.
Use this as:
f = Shared(Cr(100, 3))
g = f | f | f
Ordinarily, you do not need to provide either a name or a scope.
Providing a name is useful if you want a well-defined namespace
for the variables (e.g., for saving a subnet).
Args:
subnet: Definition of the shared network.
name: Optional name for the shared context.
scope: Optional shared scope (must be a Scope, not a string).
Raises:
ValueError: Scope is not of type tf.Scope, name is not
of type string, or both scope and name are given together.
"""
if scope is not None and not isinstance(scope, tf.VariableScope):
raise ValueError("scope must be None or a VariableScope")
if name is not None and not isinstance(scope, str):
raise ValueError("name must be None or a string")
if scope is not None and name is not None:
raise ValueError("cannot provide both a name and a scope")
if name is None:
name = "Shared_%d" % Shared.shared_number
Shared.shared_number += 1
self.subnet = subnet
self.name = name
self.scope = scope
def build_graph(self, graph):
np.random.seed(self.random_seed)
with graph.as_default():
tf.set_random_seed(self.random_seed)
# Dims: bs x num_steps x state_size
self.inputs = tf.placeholder(tf.float32, shape=[None, None, self.policy_params['nb_inputs']], name='inputs')
input_shape = tf.shape(self.inputs)
dynamic_batch_size, dynamic_num_steps = input_shape[0], input_shape[1]
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
policy_inputs = tf.reshape(self.inputs, [-1, self.policy_params['nb_inputs']])
probs, actions = capacities.policy(self.policy_params, policy_inputs)
self.probs = tf.reshape(probs, [dynamic_batch_size, dynamic_num_steps, self.policy_params['nb_outputs']])
self.actions = tf.reshape(actions, [dynamic_batch_size, dynamic_num_steps, 1])
self.action_t = self.actions[0, 0, 0]
with tf.variable_scope('Training'):
self.rewards = tf.placeholder(tf.float32, shape=[None, None, 1], name="reward")
self.mask_plh = tf.placeholder(tf.float32, shape=[None, None, 1], name="mask_plh")
baseline = tf.reduce_mean(self.rewards)
batch_size, num_steps = tf.shape(self.actions)[0], tf.shape(self.actions)[1]
line_indices = tf.matmul( # Line indice
tf.reshape(tf.range(0, batch_size), [-1, 1])
, tf.ones([1, num_steps], dtype=tf.int32)
)
column_indices = tf.matmul( # Column indice
tf.ones([batch_size, 1], dtype=tf.int32)
, tf.reshape(tf.range(0, num_steps), [1, -1])
)
depth_indices = tf.cast(tf.squeeze(self.actions, 2), tf.int32)
stacked_actions = tf.stack(
[line_indices, column_indices, depth_indices], 2
)
log_probs = tf.expand_dims(tf.log(tf.gather_nd(self.probs, stacked_actions)), 2)
# We want to average on sequence
self.loss = tf.reduce_mean( - tf.reduce_sum((log_probs * (self.rewards - baseline)) * self.mask_plh, 1))
adam = tf.train.AdamOptimizer(self.lr)
self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
self.train_op = adam.minimize(self.loss, global_step=self.global_step)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('av_score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
learning_scope = tf.VariableScope(reuse=False, name='TDLearning')
with tf.variable_scope(learning_scope):
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.targets_plh = tf.placeholder(tf.float32, shape=[None], name="targets_plh")
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_plh
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
with tf.variable_scope(fixed_q_scope):
self.update_fixed_vars_op = capacities.fix_scope(q_scope)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
# Experienced replay part
with tf.variable_scope('Learning'):
with tf.variable_scope(fixed_q_scope, reuse=True):
fixed_Qs = tf.get_variable('Qs')
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
# Note that we use the fixed Qs to create the targets
self.targets_t = capacities.get_q_learning_target(fixed_Qs, self.rewards_plh, self.next_states_plh, self.discount)
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
self.event_count, self.inc_event_count_op = capacities.counter("event_count")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
learning_scope = tf.VariableScope(reuse=False, name='Learning')
with tf.variable_scope(learning_scope):
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh")
self.targets_t = capacities.get_expected_sarsa_target(self.Qs, self.rewards_plh, self.next_states_plh, self.next_probs_plh, self.discount)
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.N0_t = tf.constant(self.N0, tf.float32, name='N_0')
self.N = tf.Variable(0., dtype=tf.float32, name='N', trainable=False)
self.min_eps_t = tf.constant(self.min_eps, tf.float32, name='min_eps')
self.inputs = tf.placeholder(tf.float32, shape=[None, self.q_params['nb_inputs']], name='inputs')
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.q_values = tf.squeeze(capacities.value_f(self.q_params, self.inputs))
self.action_t = capacities.eps_greedy(
self.inputs, self.q_values, self.env.action_space.n, self.N0, self.min_eps
)
self.q_t = self.q_values[self.action_t]
with tf.variable_scope('Training'):
self.reward = tf.placeholder(tf.float32, shape=[], name="reward")
self.next_state = tf.placeholder(tf.float32, shape=[1, self.q_params['nb_inputs']], name="nextState")
self.next_action = tf.placeholder(tf.int32, shape=[], name="nextAction")
with tf.variable_scope(q_scope, reuse=True):
next_q_values = tf.squeeze(capacities.value_f(self.q_params, self.next_state))
target_q1 = tf.stop_gradient(self.reward + self.discount * next_q_values[self.next_action])
target_q2 = self.reward
is_done = tf.cast(self.next_state[0, 4], tf.bool)
target_q = tf.where(is_done, target_q2, target_q1)
with tf.control_dependencies([target_q]):
self.loss = 1/2 * tf.square(target_q - self.q_t)
adam = tf.train.AdamOptimizer(self.lr)
self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
self.train_op = adam.minimize(self.loss, global_step=self.global_step)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
learning_scope = tf.VariableScope(reuse=False, name='Learning')
with tf.variable_scope(learning_scope):
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
self.targets_t = capacities.get_q_learning_target(self.Qs, self.rewards_plh, self.next_states_plh, self.discount)
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
learning_scope = tf.VariableScope(reuse=False, name='Learning')
with tf.variable_scope(learning_scope):
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.targets_t = capacities.get_mc_target(self.rewards_plh, self.discount)
self.loss, self.train_op = capacities.tabular_learning(
self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0, self.action_t]
learning_scope = tf.VariableScope(reuse=False, name='Learning')
with tf.variable_scope(learning_scope):
self.targets_t = tf.placeholder(tf.float32, shape=[None], name="targets_t")
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph
def build_graph(self, graph):
with graph.as_default():
tf.set_random_seed(self.random_seed)
self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")
q_scope = tf.VariableScope(reuse=False, name='QValues')
with tf.variable_scope(q_scope):
self.Qs = tf.get_variable('Qs'
, shape=[self.nb_state, self.action_space.n]
, initializer=tf.constant_initializer(self.initial_q_value)
, dtype=tf.float32
)
tf.summary.histogram('Qarray', self.Qs)
self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)
policy_scope = tf.VariableScope(reuse=False, name='Policy')
with tf.variable_scope(policy_scope):
if 'UCB' in self.config and self.config['UCB']:
self.actions_t, self.probs_t = capacities.tabular_UCB(
self.Qs, self.inputs_plh
)
else:
self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
)
self.action_t = self.actions_t[0]
self.q_value_t = self.q_preds_t[0][self.action_t]
self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
learning_scope = tf.VariableScope(reuse=False, name='Learning')
with tf.variable_scope(learning_scope):
self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
self.next_actions_plh = tf.placeholder(tf.int32, shape=[None], name="next_actions_plh")
self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh")
sigma = tf.train.inverse_time_decay(tf.constant(1., dtype=tf.float32), self.episode_id, decay_steps=100, decay_rate=0.1)
tf.summary.scalar('sigma', sigma)
self.targets_t = capacities.get_sigma_target(self.Qs, sigma, self.rewards_plh, self.next_states_plh, self.next_actions_plh, self.next_probs_plh, self.discount)
self.loss, self.train_op = capacities.tabular_learning_with_lr(
self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
)
self.score_plh = tf.placeholder(tf.float32, shape=[])
self.score_sum_t = tf.summary.scalar('score', self.score_plh)
self.loss_plh = tf.placeholder(tf.float32, shape=[])
self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
self.all_summary_t = tf.summary.merge_all()
# Playing part
self.pscore_plh = tf.placeholder(tf.float32, shape=[])
self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)
return graph