def compile(self, state_dim_values, lr=0.2, policy_rule="maxrand", init_value=None):
"""Build and initialize table with all possible state values.
state_dim_values consists of a tuple of arrays or lists - each array
gives every possible value for the corresponding dimension.
"""
self.policy_rule = policies.get(policy_rule)
if init_value is None:
self.init_value = np.zeros(self.num_actions)
else:
self.init_value = init_value
self.table = {key: np.array(self.init_value) for key in list(itertools.product(*state_dim_values))}
self.lr = lr
评论列表
文章目录