def sampleUniformActionSequence(self):
if ( isinstance(self.n_actions,int)):
""" Sample an action sequence of length self._l, where the unordered sequences have uniform probabilities"""
actions_list = range(self.n_actions)
else:
"""For N exploration steps, the goal is to have actions such that their sum spans quite uniformly
the whole range of possibilities. Among those possibilities, random choice/order of actions. """
possible_actions=[]
# Add for all actions N random element between min and max
N=3
for i,a in enumerate(self.n_actions):
possible_actions.append([])
for j in range(N):
possible_actions[i].append( self.random_state.uniform(self.n_actions[i][0],self.n_actions[i][1]) )
actions_list = list(itertools.product(*possible_actions))
sequences_with_replacement = list(itertools.combinations_with_replacement(actions_list, self._l))
index_pick = self.random_state.randint(0, len(sequences_with_replacement))
sequence = list(sequences_with_replacement[index_pick])
self.random_state.shuffle(sequence)
return sequence
评论列表
文章目录