def Q_func(self, state, train=True):
test = not train
s = Variable(state)
h = F.tanh(self.bn1(self.fc1(s),test=test))
h = F.tanh(self.bn2(self.fc2(h),test=test))
h = F.tanh(self.bn3(self.fc3(h),test=test))
h = F.tanh(self.bn4(self.fc4(h),test=test))
h = F.tanh(self.bn5(self.fc5(h),test=test))
Q = self.q_value(h)
return Q
评论列表
文章目录