def create_conv_model(self):
# This is the place where neural network model initialized
init = 'glorot_uniform'
self.state_in = Input(self.state_dim)
self.l1 = Convolution2D(32, 8, 8, activation='elu', init=init, subsample=(4, 4), border_mode='same')(
self.state_in)
self.l2 = Convolution2D(64, 4, 4, activation='elu', init=init, subsample=(2, 2), border_mode='same')(
self.l1)
# self.l3 = Convolution2D(64, 3, 3, activation='relu', init=init, subsample=(1, 1), border_mode='same')(
# self.l2)
self.l3 = self.l2
self.h = Flatten()(self.l3)
self.hidden = Dense(256, init=init, activation='elu')(self.h)
self.value = Dense(1, init=init)(self.hidden)
self.policy = Dense(self.action_dim, init=init, activation='softmax')(self.hidden)
self.q_values = self.entropy_coef * (Theano.log(self.policy + 1e-18) -
Theano.tile(Theano.sum(Theano.log(self.policy + 1e-18) * self.policy,
axis=[1], keepdims=True), (1, self.action_dim)))
self.q_values = self.q_values + Theano.tile(self.value, (1, self.action_dim))
self.model = Model(self.state_in, output=[self.policy, self.value])
评论列表
文章目录