def train_validate(self, df, validation_range, update_progress):
""" Train and validate regressor on df samples with indices listed in validation_range. """
training_summary = pd.DataFrame()
first_sample, samples, labels = prepare_samples(df, self.indicators_samples)
# progress bar parameters
total_steps = len(self.model_params['sample_presentation']) * \
len(self.model_params['exp_weight']) * len(self.model_params['k'])
completed_steps = 0
# loop over model parameters
for sample_presentation in self.model_params['sample_presentation']:
presented_samples, presented_labels, normalizer = set_presentation(samples, labels, sample_presentation, self.indicators_samples['Daily'])
for exp_weight in self.model_params['exp_weight']:
weighted_samples = apply_exp_weights(presented_samples, exp_weight)
for k in self.model_params['k']:
model, total_train_time, total_test_time = [[0 for i in range (len(h))] for j in range(3)]
error_list, relative_error_list, hit_list = [[[] for i in range (len(h))] for j in range(3)]
params = (sample_presentation, exp_weight, k)
# model training and validation core
for h_index in range(len(h)):
for index in validation_range:
i = index-first_sample
x_train, x_validate = weighted_samples[:i-h[h_index]+1,:], weighted_samples[i,:] #need to stop training h steps before test
y_train, y_validate = presented_labels[h_index][:i-h[h_index]+1], presented_labels[h_index][i]
#train
t1 = time.time()
model[h_index] = KNeighborsRegressor(n_neighbors=k) # train a separate model for each horizon
model[h_index].fit(x_train, y_train)
t2 = time.time()
train_time = (t2-t1)
#test
y_predict = model[h_index].predict(x_validate.reshape(1,-1))
test_time = (time.time()-t2)
#apend new results
y_validate_absolute = remove_presentation(y_validate,normalizer[i], sample_presentation)
y_predict_absolute = remove_presentation(y_predict ,normalizer[i], sample_presentation)
error_list[h_index] += [y_validate_absolute - y_predict_absolute]
relative_error_list[h_index] += [(y_validate_absolute - y_predict_absolute)/y_validate_absolute]
hit_list[h_index] += [(y_validate-x_validate[-1])*(y_predict-x_validate[-1]) > 0]
total_train_time[h_index] += train_time
total_test_time[h_index] += test_time
if i == len(presented_labels[h_index])-1:
#very last training point, include last training oppurtunity
x_train = weighted_samples[:i+1,:]
y_train = presented_labels[h_index][:i+1]
model[h_index].fit(x_train, y_train)
break
completed_steps += 1
update_progress(100.0 * completed_steps/total_steps)
#save last trained model, and add to training summary
training_summary = training_summary.append(summarize(self, model, error_list, relative_error_list, hit_list,
params, total_train_time, total_test_time))
return training_summary, make_presentable(training_summary, self.summary_name)
评论列表
文章目录