def choose_action(self, context): p = softmax(self.Q[context], self.beta) actions = range(self.n) action = np.random.choice(actions, p=p) return action