bench_ml.py 文件源码-python代码片段

def run(n_calls=32, n_runs=1, save_traces=True, n_jobs=1):
    """
    Main function used to run the experiments.

    Parameters
    ----------
    * `n_calls`: int
        Evaluation budget.

    * `n_runs`: int
        Number of times to repeat the optimization in order to average out noise.

    * `save_traces`: bool
        Whether or not to save data collected during optimization

    * `n_jobs`: int
        Number of different repeats of optimization to run in parallel.
    """
    surrogate_minimizers = [gbrt_minimize, forest_minimize, gp_minimize]
    selected_models = sorted(MODELS, key=lambda x: x.__name__)
    selected_datasets = (DATASETS.keys())

    # all the parameter values and objectives collected during execution are stored in list below
    all_data = {}
    for model in selected_models:
        all_data[model] = {}

        for dataset in selected_datasets:
            if not issubclass(model, DATASETS[dataset]):
                continue

            all_data[model][dataset] = {}
            for surrogate_minimizer in surrogate_minimizers:
                print(surrogate_minimizer.__name__, model.__name__, dataset)
                seeds = np.random.randint(0, 2**30, n_runs)
                raw_trace = Parallel(n_jobs=n_jobs)(
                    delayed(evaluate_optimizer)(
                        surrogate_minimizer, model, dataset, n_calls, seed
                    ) for seed in seeds
                )
                all_data[model][dataset][surrogate_minimizer.__name__] = raw_trace

    # convert the model keys to strings so that results can be saved as json
    all_data = {k.__name__: v for k,v in all_data.items()}

    # dump the recorded objective values as json
    if save_traces:
        with open(datetime.now().strftime("%m_%Y_%d_%H_%m_%s")+'.json', 'w') as f:
            json.dump(all_data, f)
    calculate_performance(all_data)