def __init__(self,
list_path,
state_space_parameters,
hyper_parameters,
epsilon=None,
number_models=None):
self.protocol = QConnection
self.new_net_lock = DeferredLock()
self.clients = {} # name of connection is key, each value is dict with {'connection', 'net', 'iters_sampled'}
self.replay_columns = ['net', #Net String
'accuracy_best_val',
'iter_best_val',
'accuracy_last_val',
'iter_last_val',
'accuracy_best_test',
'accuracy_last_test',
'ix_q_value_update', #Iteration for q value update
'epsilon', # For epsilon greedy
'time_finished', # UNIX time
'machine_run_on']
self.list_path = list_path
self.replay_dictionary_path = os.path.join(list_path, 'replay_database.csv')
self.replay_dictionary, self.q_training_step = self.load_replay()
self.schedule_or_single = False if epsilon else True
if self.schedule_or_single:
self.epsilon = state_space_parameters.epsilon_schedule[0][0]
self.number_models = state_space_parameters.epsilon_schedule[0][1]
else:
self.epsilon = epsilon
self.number_models = number_models if number_models else 10000000000
self.state_space_parameters = state_space_parameters
self.hyper_parameters = hyper_parameters
self.number_q_updates_per_train = 100
self.list_path = list_path
self.qlearner = self.load_qlearner()
self.check_reached_limit()
评论列表
文章目录