LongerExplorationPolicy.py 文件源码-python代码片段

def sampleUniformActionSequence(self):
        if ( isinstance(self.n_actions,int)):
            """ Sample an action sequence of length self._l, where the unordered sequences have uniform probabilities"""
            actions_list = range(self.n_actions)
        else:   
            """For N exploration steps, the goal is to have actions such that their sum spans quite uniformly 
            the whole range of possibilities. Among those possibilities, random choice/order of actions. """

            possible_actions=[]
            # Add for all actions N random element between min and max
            N=3
            for i,a in enumerate(self.n_actions):
                possible_actions.append([])
                for j in range(N):
                    possible_actions[i].append( self.random_state.uniform(self.n_actions[i][0],self.n_actions[i][1]) )
            actions_list = list(itertools.product(*possible_actions))

        sequences_with_replacement = list(itertools.combinations_with_replacement(actions_list, self._l))
        index_pick = self.random_state.randint(0, len(sequences_with_replacement))
        sequence = list(sequences_with_replacement[index_pick])
        self.random_state.shuffle(sequence)

        return sequence