def softmax_policy(self): action = choices(list(range(self.action_size)), weights=self.softmax)[0] return action