def policy_gradient_loss(l_sampled, l_predicted):
return A * categorical_crossentropy(l_sampled, l_predicted)[:, np.newaxis]
# inputs to the model are obesvation and advantage,
# outputs are action probabilities and baseline
评论列表
文章目录