def get_device(comm, num_masters=1, gpu_limit=-1, gpu_for_master=False):
"""Arguments:
comm: MPI intracommunicator containing all processes
num_masters: number of processes that will be assigned as masters
gpu_limit: maximum number of gpus to use on one host
gpu_for_master: whether master processes should be given a gpu
Returns device name 'cpu' or 'gpuN' appropriate for use with theano"""
rank = comm.Get_rank()
if gpu_for_master:
gpu_ranks = range(comm.Get_size())
else:
gpu_ranks = get_worker_ranks( comm, num_masters )
# Get the ranks of the other processes that share the same host
# and determine which GPU to take on the host
host = MPI.Get_processor_name()
hosts = comm.allgather(host)
workers_sharing_host = [ i for i in gpu_ranks
if hosts[i] == host ]
if rank in workers_sharing_host:
worker_id = workers_sharing_host.index( rank )
else:
worker_id = -1
# get_num_gpus will fail if CUDA is not installed, so we short circuit if 0 GPUs are requested
if gpu_limit == 0:
return 'cpu'
max_gpu = get_num_gpus() - 1
if gpu_limit > 0:
max_gpu = min( max_gpu, gpu_limit-1 )
if worker_id < 0:# or worker_id > max_gpu:
return 'cpu'
else:
return 'gpu%d' % (worker_id%(max_gpu+1))
评论列表
文章目录