def __init__(self, policy, mdp_info, params, features=None):
self.__name__ = 'GPOMDP'
super(GPOMDP, self).__init__(policy, mdp_info, params, features)
self.sum_d_log_pi = None
self.list_sum_d_log_pi = list()
self.list_sum_d_log_pi_ep = list()
self.list_reward = list()
self.list_reward_ep = list()
self.baseline_num = list()
self.baseline_den = list()
self.step_count = 0
# Ignore divide by zero
np.seterr(divide='ignore', invalid='ignore')
评论列表
文章目录