python类VariableScope()的实例源码-面圈网

base.py 文件源码项目：sonnet 作者: deepmind 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def variable_scope(self):
    """Returns the variable_scope declared by the module.

    It is valid for library users to access the internal templated
    variable_scope, but only makes sense to do so after connection. Therefore we
    raise an error here if the variable_scope is requested before connection.

    The only case where it does make sense to access the variable_scope before
    connection is to get the post-uniquification name, which we support using
    the separate .name property.

    Returns:
      variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.

    Raises:
      NotConnectedError: If the module is not connected to the Graph.
    """
    self._ensure_is_connected()
    return self._template.variable_scope

util.py 文件源码项目：sonnet 作者: deepmind 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def get_variable_scope_name(value):
  """Returns the name of the variable scope indicated by the given value.

  Args:
    value: String, variable scope, or object with `variable_scope` attribute
    (e.g., Sonnet module).

  Returns:
    The name (a string) of the corresponding variable scope.

  Raises:
    ValueError: If `value` does not identify a variable scope.
  """
  # If the object has a "variable_scope" property, use it.
  value = getattr(value, "variable_scope", value)
  if isinstance(value, tf.VariableScope):
    return value.name
  elif isinstance(value, six.string_types):
    return value
  else:
    raise ValueError("Not a variable scope: {}".format(value))

util.py 文件源码项目：sonnet 作者: deepmind 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
  """Returns a tuple `tf.Variable`s in a scope for a given collection.

  Args:
    scope: `tf.VariableScope` or string to retrieve variables from.
    collection: Collection to restrict query to. By default this is
        `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
        variables such as moving averages.

  Returns:
    A tuple of `tf.Variable` objects.
  """
  scope_name = get_variable_scope_name(scope)

  # Escape the name in case it contains any "." characters. Add a closing slash
  # so we will not search any scopes that have this scope name as a prefix.
  scope_name = re.escape(scope_name) + "/"

  return tuple(tf.get_collection(collection, scope_name))

layers.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _set_scope(self, scope):
        """Set the given scope as the scope of the layer.

        If not already present, set the scope for the layer. The name of such scope
         will be accessible through the `self.scope` property.

        Argsuments:
          scope: the given scope, of type `str` of `tf.VariableScope`. If `None`,
            the one returned from the `self._default_scope()` method will be used.
        """
        if self._scope is None:
            if self._reuse:
                self._scope = next(tf.variable_scope(  # pylint: disable=I0011,E1101
                    scope if scope is not None else self._default_scope()).gen)
            else:
                self._scope = next(tf.variable_scope(  # pylint: disable=I0011,E1101
                    scope, default_name=self._default_scope().name).gen)

streaming.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __call__(self, values, weights=None, scope=None):
        """Computes the streaming average.

        This method builds the fragment of computational graph that computes the streaming
        average, returnins a variable representing the actual streaming average value and
        an `Op` to update such value.

        Arguments:
          values: a `Tensor` of arbitrary dimensions.
          weights: pptional `Tensor` whose rank is either `0`, or the same rank
            as values, and must be broadcastable to values (i.e., all dimensions must
            be either `1`, or the same as the corresponding values dimension). It contains
            the weights for summing up all the elements in `values`.
          scope: a `str`  or a `tf.VariableScope` used for building the fragment
            of the computational graph that computes the streaming average.

        Returns:
          mean: a `Tensor` representing the current mean, which is a reference
            to `self.value`.
          update_op: an `Op` that updates the streaming value, which is a reference
            to `self.update_op`.
        """
        self.compute(values, weights=weights, scope=scope)
        return self.value, self.update_op

base.py 文件源码项目：tf-tutorial 作者: zchen0211 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def variable_scope(self):
    """Returns the variable_scope declared by the module.

    It is valid for library users to access the internal templated
    variable_scope, but only makes sense to do so after connection. Therefore
    we raise an error here if the variable_scope is requested before connection.

    The only case where it does make sense to access the variable_scope before
    connection is to get the post-uniquification name, which we support using
    the separate .name property.

    Returns:
      variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.

    Raises:
      NotConnectedError: If the module is not connected to the Graph.
    """
    self._ensure_is_connected()
    return self._template.variable_scope

util.py 文件源码项目：tf-tutorial 作者: zchen0211 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
  """Returns a tuple `tf.Variable`s in a scope for a given collection.

  Args:
    scope: `tf.VariableScope` instance to retrieve variables from.
    collection: Collection to restrict query to. By default this is
        `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
        variables such as moving averages.

  Returns:
    A tuple of `tf.Variable` objects.
  """
  # Escape the name in case it contains any "." characters. Add a closing slash
  # so we will not search any scopes that have this scope name as a prefix.
  scope_name = re.escape(scope.name) + "/"

  return tuple(tf.get_collection(collection, scope_name))

base.py 文件源码项目：tf-sparql 作者: derdav3 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def variable_scope(self):
    """Returns the variable_scope declared by the module.

    It is valid for library users to access the internal templated
    variable_scope, but only makes sense to do so after connection. Therefore
    we raise an error here if the variable_scope is requested before connection.

    The only case where it does make sense to access the variable_scope before
    connection is to get the post-uniquification name, which we support using
    the separate .name property.

    Returns:
      variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.

    Raises:
      NotConnectedError: If the module is not connected to the Graph.
    """
    self._ensure_is_connected()
    return self._template.variable_scope

util.py 文件源码项目：tf-sparql 作者: derdav3 项目源码文件源码阅读 109 收藏 0 点赞 0 评论 0

def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES):
  """Returns a tuple `tf.Variable`s in a scope for a given collection.

  Args:
    scope: `tf.VariableScope` instance to retrieve variables from.
    collection: Collection to restrict query to. By default this is
        `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable
        variables such as moving averages.

  Returns:
    A tuple of `tf.Variable` objects.
  """
  # Escape the name in case it contains any "." characters. Add a closing slash
  # so we will not search any scopes that have this scope name as a prefix.
  scope_name = re.escape(scope.name) + "/"

  return tuple(tf.get_collection(collection, scope_name))

layers.py 文件源码项目：fold 作者: tensorflow 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def __init__(self, input_type=None, output_type=None, name_or_scope=None):
    """Creates the layer.

    Args:
      input_type: A type.
      output_type: A type.
      name_or_scope: A string or variable scope. If a string, a new variable
        scope will be created by calling
        [`create_variable_scope`](#create_variable_scope), with defaults
        inherited from the current variable scope. If no caching device is set,
        it will be set to `lambda op: op.device`. This is because `tf.while` can
        be very inefficient if the variables it uses are not cached locally.
    """
    if name_or_scope is None: name_or_scope = type(self).__name__
    if isinstance(name_or_scope, tf.VariableScope):
      self._vscope = name_or_scope
      name = str(self._vscope.name)
    elif isinstance(name_or_scope, six.string_types):
      self._vscope = create_variable_scope(name_or_scope)
      name = name_or_scope
    else:
      raise TypeError('name_or_scope must be a tf.VariableScope or a string: '
                      '%s' % (name_or_scope,))
    if self._vscope.caching_device is None:
      self._vscope.set_caching_device(lambda op: op.device)
    super(Layer, self).__init__(input_type, output_type, name)

    if not hasattr(self, '_constructor_name'):
      self._constructor_name = '__.%s' % self.__class__.__name__
    if not hasattr(self, '_constructor_args'):
      self._constructor_args = None
    if not hasattr(self, '_constructor_kwargs'):
      self._constructor_kwargs = None

layers.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, states, inner_size, trainable=True, scope=None):
        """Initiailzes a new instance of the BahdanauAttention class.

        The attention mechanism implemented in this class is the one
        described by Bahdanau et al. here: https://arxiv.org/abs/1409.0473.
        The attention states and the query are projected to the attention
        inner size, then summed together and processed with a tanh and
        finally dot producted with an attention vector. All the operations
        are performed on a reference size, named as the attention size, which
        must be set during the initialization phase (with the `size` argument).

        Arguments:
          states: 3-D Tensor of shape [batch, timesteps, state] representing the
            states on which the attention scores will be computed; the third dimension
            of the tensor must be statically determined.
          inner_size: int representing the inner attention size;
          trainable: if True, variables will be trainable;
          scope: None, str or tf.VariableScope representing the variable scope
            of the layer which will be used to create all the needed variables.

        Raises:
          ValueError: if the last dimension of the `state` argument is not
            statically determined.
        """
        super(BahdanauAttention, self).__init__(trainable=trainable, scope=scope)
        self._states = states
        self._size = inner_size
        self._memory = None
        self._vector = None
        self._var_op_names = set()

        # check that the last dimension of the `states`
        # variable is fully defined.
        state_size = states.get_shape()[-1].value
        if state_size is None:
            raise ValueError('Last dimension of `states` must be defined, found %s'
                             % str(tf.shape(states)))
        self._state_size = state_size

layers.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def __init__(self, shortlist_size, decoder_out_size, state_size,
                 trainable=True, scope='PointingSoftmaxOutput'):
        """Initializes a new instance.

        Arguments:
          shorlist_size: a `int` representing the dimension of the known output vocabulary.
          decoder_out_size: a `int` representing the output size of the recoder.
          state_size: a `int` representing the size of the attention states.
          trainable: if `True`, the created variables will be trainable.
          scope: VariableScope for the created subgraph;.
        """
        super(PointingSoftmaxOutput, self).__init__(trainable=trainable, scope=scope)
        self._shortlist_size = shortlist_size
        self._decoder_out_size = decoder_out_size
        self._state_size = state_size

utils.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def as_scope(scope):
    """Get the proper variable scope.

    Given an object that can represent a `tf.VariableScope`,
    namely a `str` or a `tf.VariableScope`, performs type checking
    and return a proper `tf.VariableScope` object. Such function is
    hancy when a function accepts an argument serving as a variable
    scope but doesn's know its proper type.

    Arguments:
      scope: a `str` or a `tf.VariableScope` representing a variable scope.

    Returns:
      a `tf.VariableScope` instance.

    Raises:
      ValueError: if `scope` is `None`.
      TypeError: if `scope` is neither `str` or `tf.VariableScope`.

    Example:
    ```python
    from dket import utils

    def do_something(scope):
        scope = utils.as_scope(scope or 'DefaultScope')
        with tf.variable_scope(scope) as scope:
            # do something
            pass

"""
if scope is None:
    raise ValueError('Cannot create a scope from a None.')
if isinstance(scope, str):
    return next(tf.variable_scope(scope).gen)  # pylint: disable=I0011,E1101
if isinstance(scope, tf.VariableScope):
    return scope
raise TypeError("""`scope` argument can be of type str, """
                """tf.VariableScope, while %s found.""",
                (str(type(scope))))

```

utils.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_variables(prefix=None):
    """Get variables by their name prefix.

    Arguments:
      prefix: a `str` or a `tf.VariableScope` instance.

    Returns:
      a list of `tf.Variable` with their name starting with the
        given prefix, i.e. all those variables under the scope
        specified by the prefix.
    """
    prefix = prefix or tf.get_variable_scope().name
    return [var for var in tf.global_variables()
            if var.name.startswith(prefix)]

specs_ops.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init__(self, subnet, name=None, scope=None):
    """Create the Shared operator.

    Use this as:

        f = Shared(Cr(100, 3))
        g = f | f | f

    Ordinarily, you do not need to provide either a name or a scope.
    Providing a name is useful if you want a well-defined namespace
    for the variables (e.g., for saving a subnet).

    Args:
        subnet: Definition of the shared network.
        name: Optional name for the shared context.
        scope: Optional shared scope (must be a Scope, not a string).

    Raises:
        ValueError: Scope is not of type tf.Scope, name is not
        of type string, or both scope and name are given together.
    """
    if scope is not None and not isinstance(scope, tf.VariableScope):
      raise ValueError("scope must be None or a VariableScope")
    if name is not None and not isinstance(scope, str):
      raise ValueError("name must be None or a string")
    if scope is not None and name is not None:
      raise ValueError("cannot provide both a name and a scope")
    if name is None:
      name = "Shared_%d" % Shared.shared_number
      Shared.shared_number += 1
    self.subnet = subnet
    self.name = name
    self.scope = scope

deep_policy_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        np.random.seed(self.random_seed)
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            # Dims: bs x num_steps x state_size
            self.inputs = tf.placeholder(tf.float32, shape=[None, None, self.policy_params['nb_inputs']], name='inputs')
            input_shape = tf.shape(self.inputs)
            dynamic_batch_size, dynamic_num_steps = input_shape[0], input_shape[1]

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                policy_inputs = tf.reshape(self.inputs, [-1, self.policy_params['nb_inputs']])
                probs, actions = capacities.policy(self.policy_params, policy_inputs)
                self.probs = tf.reshape(probs, [dynamic_batch_size, dynamic_num_steps, self.policy_params['nb_outputs']])
                self.actions = tf.reshape(actions, [dynamic_batch_size, dynamic_num_steps, 1])
            self.action_t = self.actions[0, 0, 0]

            with tf.variable_scope('Training'):
                self.rewards = tf.placeholder(tf.float32, shape=[None, None, 1], name="reward")
                self.mask_plh = tf.placeholder(tf.float32, shape=[None, None, 1], name="mask_plh")

                baseline = tf.reduce_mean(self.rewards)

                batch_size, num_steps = tf.shape(self.actions)[0], tf.shape(self.actions)[1]
                line_indices = tf.matmul( # Line indice
                    tf.reshape(tf.range(0, batch_size), [-1, 1])
                    , tf.ones([1, num_steps], dtype=tf.int32)
                )
                column_indices = tf.matmul( # Column indice
                    tf.ones([batch_size, 1], dtype=tf.int32)
                    , tf.reshape(tf.range(0, num_steps), [1, -1])
                )
                depth_indices = tf.cast(tf.squeeze(self.actions, 2), tf.int32)
                stacked_actions = tf.stack(
                    [line_indices, column_indices, depth_indices], 2
                )

                log_probs = tf.expand_dims(tf.log(tf.gather_nd(self.probs, stacked_actions)), 2)
                # We want to average on sequence
                self.loss = tf.reduce_mean( - tf.reduce_sum((log_probs * (self.rewards - baseline)) * self.mask_plh, 1))

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
                self.train_op = adam.minimize(self.loss, global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('av_score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_td_lambda_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                    self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            learning_scope = tf.VariableScope(reuse=False, name='TDLearning')
            with tf.variable_scope(learning_scope):
                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
                self.targets_plh = tf.placeholder(tf.float32, shape=[None], name="targets_plh")

                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_plh
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_q_double_er_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
            with tf.variable_scope(fixed_q_scope):
                self.update_fixed_vars_op = capacities.fix_scope(q_scope)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            # Experienced replay part
            with tf.variable_scope('Learning'):
                with tf.variable_scope(fixed_q_scope, reuse=True):
                    fixed_Qs = tf.get_variable('Qs')

                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")

                # Note that we use the fixed Qs to create the targets
                self.targets_t = capacities.get_q_learning_target(fixed_Qs, self.rewards_plh, self.next_states_plh, self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")
            self.event_count, self.inc_event_count_op = capacities.counter("event_count")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_expected_sarsa_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            learning_scope = tf.VariableScope(reuse=False, name='Learning')
            with tf.variable_scope(learning_scope):
                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
                self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh")

                self.targets_t = capacities.get_expected_sarsa_target(self.Qs, self.rewards_plh, self.next_states_plh, self.next_probs_plh, self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

deep_q_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.N0_t = tf.constant(self.N0, tf.float32, name='N_0')
            self.N = tf.Variable(0., dtype=tf.float32, name='N', trainable=False)
            self.min_eps_t = tf.constant(self.min_eps, tf.float32, name='min_eps')

            self.inputs = tf.placeholder(tf.float32, shape=[None, self.q_params['nb_inputs']], name='inputs')

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = tf.squeeze(capacities.value_f(self.q_params, self.inputs))

            self.action_t = capacities.eps_greedy(
                self.inputs, self.q_values, self.env.action_space.n, self.N0, self.min_eps
            )
            self.q_t = self.q_values[self.action_t]

            with tf.variable_scope('Training'):
                self.reward = tf.placeholder(tf.float32, shape=[], name="reward")
                self.next_state = tf.placeholder(tf.float32, shape=[1, self.q_params['nb_inputs']], name="nextState")
                self.next_action = tf.placeholder(tf.int32, shape=[], name="nextAction")

                with tf.variable_scope(q_scope, reuse=True):
                    next_q_values = tf.squeeze(capacities.value_f(self.q_params, self.next_state))
                target_q1 = tf.stop_gradient(self.reward + self.discount * next_q_values[self.next_action])
                target_q2 = self.reward
                is_done = tf.cast(self.next_state[0, 4], tf.bool)
                target_q = tf.where(is_done, target_q2, target_q1)
                with tf.control_dependencies([target_q]):
                    self.loss = 1/2 * tf.square(target_q - self.q_t)

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
                self.train_op = adam.minimize(self.loss, global_step=self.global_step)


            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_q_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            learning_scope = tf.VariableScope(reuse=False, name='Learning')
            with tf.variable_scope(learning_scope):
                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")

                self.targets_t = capacities.get_q_learning_target(self.Qs, self.rewards_plh, self.next_states_plh, self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_mc_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            learning_scope = tf.VariableScope(reuse=False, name='Learning')
            with tf.variable_scope(learning_scope):
                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")

                self.targets_t = capacities.get_mc_target(self.rewards_plh, self.discount)
                self.loss, self.train_op = capacities.tabular_learning(
                    self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_td_0_nstep_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0, self.action_t]

            learning_scope = tf.VariableScope(reuse=False, name='Learning')
            with tf.variable_scope(learning_scope):
                self.targets_t = tf.placeholder(tf.float32, shape=[None], name="targets_t")
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph

tabular_sigma_agent.py 文件源码项目：openai-rl 作者: morgangiraud 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable('Qs'
                    , shape=[self.nb_state, self.action_space.n]
                    , initializer=tf.constant_initializer(self.initial_q_value)
                    , dtype=tf.float32
                )
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh
                    )    
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps
                    )
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id")

            learning_scope = tf.VariableScope(reuse=False, name='Learning')
            with tf.variable_scope(learning_scope):
                self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh")
                self.next_actions_plh = tf.placeholder(tf.int32, shape=[None], name="next_actions_plh")
                self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh")

                sigma = tf.train.inverse_time_decay(tf.constant(1., dtype=tf.float32), self.episode_id, decay_steps=100, decay_rate=0.1)
                tf.summary.scalar('sigma', sigma)

                self.targets_t = capacities.get_sigma_target(self.Qs, sigma, self.rewards_plh, self.next_states_plh, self.next_actions_plh, self.next_probs_plh, self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t
                )

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh)

        return graph