def predict(st,norm,bounds):
rew = np.log(1+ (st[:,-1:]))
a_x = bounds[0]
b_x = bounds[2]
eps = 1e-5
rew = np.clip(rew,a_x+eps,b_x-eps)
rew = logit((rew - a_x) / (b_x - a_x))
st [:,-1:] = rew
State = np.zeros((1,61))
State[0,:] = np.hstack((st[0,0],st[:,[1,2,3,-1]].ravel()))
X = (State - norm[0]) / norm[1]
return np.round(policy_network(X)[0,:],4)
评论列表
文章目录