def __init__(self,params,params_task,X,model,policy):
self.rng = np.random.RandomState()
self.model = model
self.policy = policy
self.params = params
self.params_task = params_task
self.x = T.matrix('x')
cost = self.control(self.x)
self.fwpass = theano.function(inputs=[self.x], outputs = cost,allow_input_downcast=True)
self.train_func = theano.function(inputs=[self.x],outputs=[cost], updates=self.adam(cost,lasagne.layers.get_all_params(self.policy,trainable=True),learning_rate=self.params['learning_rate']))
self.policy_network = theano.function(inputs=[self.x],outputs=self.predict(self.x))
评论列表
文章目录