def run_benchmark_distributed():
ops = create_graph("/job:worker/task:0", "/job:worker/task:1")
queues = [create_done_queue(0), create_done_queue(1)]
# launch distributed service
port0, port1 = [portpicker.pick_unused_port() for _ in range(2)]
flags = " ".join(sys.argv) # pass parent flags to children
def run_worker(w):
my_env = os.environ.copy()
if not FLAGS.verbose:
my_env["CUDA_VISIBLE_DEVICES"] = ""
my_env["TF_CPP_MIN_LOG_LEVEL"] = "2"
if FLAGS.profile:
my_env["LD_PRELOAD"]="/usr/lib/libtcmalloc_and_profiler.so.4"
my_env["CPUPROFILE"]="/tmp/profile.out.%s"%(w)
cmd = "python %s --task=%d --port0=%s --port1=%s"%(flags, w, port0, port1)
subprocess.Popen(cmd, shell=True, stderr=subprocess.STDOUT,
env=my_env)
run_worker(0)
run_worker(1)
sess = tf.Session("grpc://%s:%s"%(host, port0), config=session_config())
rate = run_benchmark(sess, *ops)
# bring down workers
if FLAGS.verbose:
print("Killing workers.")
sess.run(queues[1].enqueue(1))
sess.run(queues[0].enqueue(1)) # bring down master last
return rate
评论列表
文章目录