ensemble.py 文件源码-python代码片段

def parallel_fit(self, X, y, client_token=None, est_timeout=None):
    self.n_outputs_ = 1
    self.classes_ = np.array(np.unique(check_array(y, ensure_2d=False,
                                                   allow_nd=True, dtype=None)))

    if est_timeout is None:
      est_timeout = int(1e6)

    # Store X and y data for workers to use
    with open(self.X_file.name, 'wb') as outfile:
      pickle.dump(X, outfile, pickle.HIGHEST_PROTOCOL)
    with open(self.y_file.name, 'wb') as outfile:
      pickle.dump(y, outfile, pickle.HIGHEST_PROTOCOL)

    sigopt_procs = []
    for build_args in self.estimator_build_args:
      # run separaete python process for each estimator with timeout
      # these processes are wrapped in timeout command to capture case
      # where a single observation never completes
      sigopt_procs.append(Popen([
        "timeout", str(est_timeout + 10), "python", sklearn_fit.__file__,
        "--opt_timeout", str(est_timeout),
        "--estimator", build_args['estimator'],
        "--X_file", build_args['X_file'], "--y_file", build_args['y_file'],
        "--client_token", client_token,
        "--output_file", build_args['output_file']
      ]))
    exit_codes = [p.wait() for p in sigopt_procs]
    return_codes_args = zip(exit_codes, self.estimator_build_args)

    # remove estimators that errored or timed out
    valid_est_args = [rc_args[1] for rc_args in return_codes_args
                      if rc_args[0] == 0]

    # load valid estimators back into memory
    for est_arg in valid_est_args:
      with open(est_arg['output_file'], 'rb') as infile:
        clf = pickle.load(infile)
        self.estimator_ensemble.append(clf)