def _pnl_pos(self, e, s, a, pnl, inputs):
'''
Return the reward based on PnL from the last step marked to the
mid-price of the instruments traded
:param e: Environment object. Environment where the agent operates
:param a: Agent object. the agent that will perform the action
:param s: dictionary. The inputs from environment to the agent
:param pnl: float. The current pnl of the agent
:param inputs: dictionary. The inputs from environment to the agent
'''
reward = self._pnl(e, s, a, pnl, inputs)
s_main = e.s_main_intrument
if not a.logged_action:
return reward
f_penalty = abs(e.agent_states[a][s_main]['Position']) * 0.02
f_penalty += abs(np.around(a.log_info['duration'])) * 0.30
return reward - f_penalty
评论列表
文章目录