def create_fc_model(self):
# This is the place where neural network model initialized
init = 'glorot_uniform'
self.state_in = Input(self.state_dim)
self.hidden = Dense(256, init=init, activation='elu')(self.state_in)
self.value = Dense(1)(self.hidden)
self.policy = Dense(self.action_dim, init=init, activation='softmax')(self.hidden)
self.q_values = self.entropy_coef * (Theano.log(self.policy + 1e-18) -
Theano.tile(Theano.sum(Theano.log(self.policy + 1e-18) * self.policy,
axis=[1], keepdims=True), (1, self.action_dim)))
# print (type(Theano.sum(Theano.log(self.policy + 1e-18) * self.policy,
# axis=[1], keepdims=True)))
# print(Theano.function([self.state_in], [Theano.sum(Theano.log(self.policy + 1e-18) * self.policy,
# axis=[1], keepdims=True)])([np.zeros((32,) + self.state_dim)])[0].shape)
# 1/0
self.q_values = self.q_values + Theano.tile(self.value, (1, self.action_dim))
self.model = Model(self.state_in, output=[self.policy, self.value])
评论列表
文章目录