def control(self,st):
srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
# do n roll-outs for each starting state
n = self.params['samples']
st_s = T.tile(st,[n,1])
onoise = srng.normal(size=(st_s.shape[0],1,self.params['T']))
inoise = T.sqrt(st.shape[1]) * srng.normal(size=(n,st.shape[0],self.params['T']))
([_,_,R], updates) = theano.scan(fn=self._step,outputs_info=[st_s,T.as_tensor_variable(0),None],n_steps=self.params['T'],non_sequences=[onoise,inoise])
return R.mean()
评论列表
文章目录