def main():
'''
Beginning on START_DATE, step forward hourly, training on last
hour's NLDAS FORA dataset with transformers in a 2-layer hierarchical
ensemble, training on the last hour of data and making
out-of-training-sample predictions for the current hour. Makes
a dill dump file for each hour run. Runs fro NSTEPS hour steps.
'''
date = START_DATE
add_hour = datetime.timedelta(hours=1)
get_file_name = lambda date: date.isoformat(
).replace(':','_').replace('-','_') + '.dill'
scalers = zip(('MinMaxScaler', 'RobustScaler', 'StandardScaler', 'None'),
(minmax, robust, standard, None))
estimators = zip(('LinearRegression', ),
(linear, ))
init_func = partial(ensemble_init_func,
pca=pca,
scalers=scalers,
n_components=n_components,
estimators=estimators,
preamble=preamble,
log=log,
minmax_bounds=minmax_bounds,
summary='Flatten, Subset, Drop NaN Rows, Get Y Data, Difference X in Time')
for step in range(NSTEPS):
last_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
date += add_hour
this_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
current_file = get_file_name(date)
out = train_model_on_models(last_hour_data, this_hour_data, init_func)
dill.dump(out, open(current_file, 'wb'))
print('Dumped to:', current_file)
l2, t2, models, preds, models2, preds2 = out
layer_1_scores = [model._score for _, model in models]
layer_2_scores = [model._score for _, model in models2]
print('Scores in layer 1 models:', layer_1_scores)
print('Scores in layer 2 models:', layer_2_scores)
return last_hour_data, this_hour_data, models, preds, models2, preds2
评论列表
文章目录