def _run_job(name, config, gpu=None, prog_args=None, background=False):
import socket
import subprocess
import daemon
exper_dir = _expath(name)
runem_cmd = ([config['experiment']['prog']] +
config['experiment']['prog_args'] +
(prog_args or []))
env = os.environ
if gpu:
env['CUDA_VISIBLE_DEVICES'] = gpu
def _do_run_job():
try:
job = subprocess.Popen(runem_cmd, cwd=exper_dir, env=env,
stdin=sys.stdin, stdout=sys.stdout,
stderr=sys.stderr)
with shelve.open('.em', writeback=True) as emdb:
emdb[name] = {
'started': _tstamp(),
'status': 'running',
'pid': job.pid,
'hostname': socket.getfqdn(),
}
if gpu:
emdb[name]['gpu'] = gpu
job.wait()
with shelve.open('.em', writeback=True) as emdb:
status = 'completed' if job.returncode == 0 else 'error'
emdb[name]['status'] = status
except KeyboardInterrupt:
with shelve.open('.em', writeback=True) as emdb:
emdb[name]['status'] = 'interrupted'
finally:
with shelve.open('.em', writeback=True) as emdb:
emdb[name].pop('pid', None)
emdb[name]['ended'] = _tstamp()
if background:
curdir = osp.abspath(os.curdir)
with daemon.DaemonContext(working_directory=curdir):
_do_run_job()
else:
_do_run_job()
评论列表
文章目录