def parallel_fit(self, X, y, client_token=None, est_timeout=None):
self.n_outputs_ = 1
self.classes_ = np.array(np.unique(check_array(y, ensure_2d=False,
allow_nd=True, dtype=None)))
if est_timeout is None:
est_timeout = int(1e6)
# Store X and y data for workers to use
with open(self.X_file.name, 'wb') as outfile:
pickle.dump(X, outfile, pickle.HIGHEST_PROTOCOL)
with open(self.y_file.name, 'wb') as outfile:
pickle.dump(y, outfile, pickle.HIGHEST_PROTOCOL)
sigopt_procs = []
for build_args in self.estimator_build_args:
# run separaete python process for each estimator with timeout
# these processes are wrapped in timeout command to capture case
# where a single observation never completes
sigopt_procs.append(Popen([
"timeout", str(est_timeout + 10), "python", sklearn_fit.__file__,
"--opt_timeout", str(est_timeout),
"--estimator", build_args['estimator'],
"--X_file", build_args['X_file'], "--y_file", build_args['y_file'],
"--client_token", client_token,
"--output_file", build_args['output_file']
]))
exit_codes = [p.wait() for p in sigopt_procs]
return_codes_args = zip(exit_codes, self.estimator_build_args)
# remove estimators that errored or timed out
valid_est_args = [rc_args[1] for rc_args in return_codes_args
if rc_args[0] == 0]
# load valid estimators back into memory
for est_arg in valid_est_args:
with open(est_arg['output_file'], 'rb') as infile:
clf = pickle.load(infile)
self.estimator_ensemble.append(clf)
评论列表
文章目录