model.py 文件源码-python代码片段

def _create_train_ops(self, dependencies: List[List[tf.Operation]], optimizer_config: Optional[dict]) -> None:
        """
        Create the train ops for training. In order to handle incomplete batches, there must be one train op for
        each number of empty towers. E.g. for 2 GPU training, one must define 2 train ops for 1 and 2 towers
        respectively. The train ops must be named ``train_op_1``, ``train_op_2`` etc.
        wherein the suffixed number stands for the number of towers.

        By default the train ops are constructed in the following way:
            - optimizer is created from the ``model.optimizer`` configuration dict
            - REGULARIZATION_LOSSSES collection is summed to ``regularization_loss``
            - gradients minimizing the respective tower losses and ``regularization_loss`` are computed
            - for each number of non-empty towers
                - gradients of the respective towers are averaged and applied

        To implement a custom behavior, override this method and create your own op named as :py:attr:`TRAIN_OP_NAME`.

        .. code-block:: yaml
            :caption: example optimizer config

            model:
                optimizer:
                    class: RMSPropOptimizer
                    learning_rate: 0.001

        :param dependencies: a list of dependent operations (e.g. batch normalization updates) for each number of towers
        :param optimizer_config: optimizer configuration dict
        """
        if optimizer_config is None:
            raise ValueError('Optimizer config was not specified although it is required for creating the train op. '
                             'Please specify the configuration in `model.optimizer`.')
        grads_and_vars = []
        optimizer = create_optimizer(optimizer_config)
        regularization_losses = self.graph.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        regularization_loss = tf.reduce_sum(tf.stack(regularization_losses))
        if regularization_losses:
            logging.info('\tAdding regularization losses')
            logging.debug('\tRegularization losses: %s', [var.name for var in regularization_losses])
        for tower in self._towers:
            with tower:
                grads_and_vars.append(optimizer.compute_gradients(tf.reduce_mean(tower.loss) + regularization_loss))
        for i in range(len(self._towers)):
            with tf.control_dependencies(dependencies[i]):
                optimizer.apply_gradients(average_gradients(grads_and_vars[:(i + 1)]),
                                          name=BaseModel.TRAIN_OP_NAME + '_{}'.format(i + 1))