def calc_reward(self, action=0, state=None, **kw ):
"""Calculate the reward for the specified transition."""
eps1, eps2 = self.eps_values_for_actions[action]
if state is None:
state = self.observe()
if self.logspace:
T1, T2, T1s, T2s, V, E = 10**state
else:
T1, T2, T1s, T2s, V, E = state
# the reward function penalizes treatment because of side-effects
reward = -0.1*V - 2e4*eps1**2 - 2e3*eps2**2 + 1e3*E
# Constrain reward to be within specified range
if np.isnan(reward):
reward = -self.reward_bound
elif reward > self.reward_bound:
reward = self.reward_bound
elif reward < -self.reward_bound:
reward = -self.reward_bound
return reward
评论列表
文章目录