nldas_soil_moisture_ml.py 文件源码-python代码片段

def main():
    '''
    Beginning on START_DATE, step forward hourly, training on last
    hour's NLDAS FORA dataset with transformers in a 2-layer hierarchical
    ensemble, training on the last hour of data and making
    out-of-training-sample predictions for the current hour.  Makes
    a dill dump file for each hour run. Runs fro NSTEPS hour steps.
    '''
    date = START_DATE
    add_hour = datetime.timedelta(hours=1)
    get_file_name = lambda date: date.isoformat(
                        ).replace(':','_').replace('-','_') + '.dill'
    scalers = zip(('MinMaxScaler', 'RobustScaler', 'StandardScaler', 'None'),
                  (minmax, robust, standard, None))
    estimators = zip(('LinearRegression', ),
                     (linear, ))
    init_func = partial(ensemble_init_func,
                        pca=pca,
                        scalers=scalers,
                        n_components=n_components,
                        estimators=estimators,
                        preamble=preamble,
                        log=log,
                        minmax_bounds=minmax_bounds,
                        summary='Flatten, Subset, Drop NaN Rows, Get Y Data, Difference X in Time')
    for step in range(NSTEPS):
        last_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
        date += add_hour
        this_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
        current_file = get_file_name(date)
        out = train_model_on_models(last_hour_data, this_hour_data, init_func)
        dill.dump(out, open(current_file, 'wb'))
        print('Dumped to:', current_file)
        l2, t2, models, preds, models2, preds2 = out
        layer_1_scores = [model._score for _, model in models]
        layer_2_scores = [model._score for _, model in models2]
        print('Scores in layer 1 models:', layer_1_scores)
        print('Scores in layer 2 models:', layer_2_scores)
    return last_hour_data, this_hour_data, models, preds, models2, preds2