__init__.py 文件源码-python代码片段

def reduce_multigpu(tensor_list, dst, op=reduce_op.SUM, group=group.WORLD):
    """Reduces the tensor data on multiple GPUs across all machines. Each tensor
    in tensor_list should reside on a separate GPU

    Only the GPU of tensor_list[0] on the process with rank ``dst`` is
    going to receive the final result.

    Only nccl backend is currently supported
    tensors should only be GPU tensors

    Arguments:
        tensor_list (List[Tensor]): Input and output GPU tensors of the
            collective . The function operates in-place.
        dst (int): Destination rank
        op (optional): One of the values from ``torch.distributed.reduce_op``
            enum.  Specifies an operation used for element-wise reductions.
        group (optional): Group of the collective.
    """
    assert torch.distributed._initialized == _INITIALIZED_PG, \
        "collective only supported in process-group mode"

    warnings.warn("""
    ================================================================================
                                        WARNING
    ================================================================================
    reduce__multigpu is still experimental. The API will change without
    notice and we're can't guarantee full correctness and expected performance yet.
    We'll announce it once it's ready.
    """)

    return torch._C._dist_reduce_multigpu(tensor_list, dst, op, group)