def perform_action(self, action, perturb_params=False, p_lambda1=0, p_lambda2=0, p_k1=0, \
p_k2=0, p_f=0, p_m1=0, p_m2=0, p_lambdaE=0, p_bE=0, p_Kb=0, p_d_E=0, p_Kd=0, **kw):
"""Perform the specifed action and upate the environment.
Arguments:
action -- action to be taken
Keyword Arguments:
perturb_params -- boolean indicating whether to perturb dynamics (default: False)
p_lambda1 -- hidden parameter (default: 0)
p_lambda2 -- hidden parameter (default: 0)
p_k1 -- hidden parameter (default: 0)
p_k2 -- hidden parameter (default: 0)
p_f -- hidden parameter (default: 0)
p_m1 -- hidden parameter (default: 0)
p_m2 -- hidden parameter (default: 0)
p_lambdaE -- hidden parameter (default: 0)
p_bE -- hidden parameter (default: 0)
p_Kb -- hidden parameter (default: 0)
p_d_E -- hidden parameter (default: 0)
p_Kd -- hidden parameter (default: 0)
"""
self.t += 1
self.action = action
eps1, eps2 = self.eps_values_for_actions[action]
r = ode(self.model_derivatives).set_integrator('vode',nsteps=10000,method='bdf')
t0 = 0
deriv_args = (eps1, eps2, perturb_params, p_lambda1, p_lambda2, p_k1, p_k2, p_f, p_m1, p_m2, p_lambdaE, p_bE, p_Kb, p_d_E, p_Kd)
r.set_initial_value(self.state, t0).set_f_params(deriv_args)
self.state = r.integrate(self.dt)
reward = self.calc_reward(action=action)
return reward, self.observe()
评论列表
文章目录