def _create_train_ops(self, dependencies: List[List[tf.Operation]], optimizer_config: Optional[dict]) -> None:
"""
Create the train ops for training. In order to handle incomplete batches, there must be one train op for
each number of empty towers. E.g. for 2 GPU training, one must define 2 train ops for 1 and 2 towers
respectively. The train ops must be named ``train_op_1``, ``train_op_2`` etc.
wherein the suffixed number stands for the number of towers.
By default the train ops are constructed in the following way:
- optimizer is created from the ``model.optimizer`` configuration dict
- REGULARIZATION_LOSSSES collection is summed to ``regularization_loss``
- gradients minimizing the respective tower losses and ``regularization_loss`` are computed
- for each number of non-empty towers
- gradients of the respective towers are averaged and applied
To implement a custom behavior, override this method and create your own op named as :py:attr:`TRAIN_OP_NAME`.
.. code-block:: yaml
:caption: example optimizer config
model:
optimizer:
class: RMSPropOptimizer
learning_rate: 0.001
:param dependencies: a list of dependent operations (e.g. batch normalization updates) for each number of towers
:param optimizer_config: optimizer configuration dict
"""
if optimizer_config is None:
raise ValueError('Optimizer config was not specified although it is required for creating the train op. '
'Please specify the configuration in `model.optimizer`.')
grads_and_vars = []
optimizer = create_optimizer(optimizer_config)
regularization_losses = self.graph.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
regularization_loss = tf.reduce_sum(tf.stack(regularization_losses))
if regularization_losses:
logging.info('\tAdding regularization losses')
logging.debug('\tRegularization losses: %s', [var.name for var in regularization_losses])
for tower in self._towers:
with tower:
grads_and_vars.append(optimizer.compute_gradients(tf.reduce_mean(tower.loss) + regularization_loss))
for i in range(len(self._towers)):
with tf.control_dependencies(dependencies[i]):
optimizer.apply_gradients(average_gradients(grads_and_vars[:(i + 1)]),
name=BaseModel.TRAIN_OP_NAME + '_{}'.format(i + 1))
评论列表
文章目录