def V(self,eta):
""" returns a vector of length K with the expected value of R (over x sampled from p(x|a)) for each action a """
#with np.errstate(divide='ignore'):
u = np.true_divide(1.0,np.dot(self.A,eta))
u = np.nan_to_num(u) # converts infinities to very large numbers such that multiplying by 0 gives 0
v = np.dot(self.A2T,u)
return v
评论列表
文章目录