def select_action(self, state):
mu, sigma_sq = self.model(Variable(state).cuda())
sigma_sq = F.softplus(sigma_sq)
eps = torch.randn(mu.size())
# calculate the probability
action = (mu + sigma_sq.sqrt()*Variable(eps).cuda()).data
prob = normal(action, mu, sigma_sq)
entropy = -0.5*((sigma_sq+2*pi.expand_as(sigma_sq)).log()+1)
log_prob = prob.log()
return action, log_prob, entropy
reinforce_continuous.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录