pc_model.py 文件源码-python代码片段

def __init__(self, hidden_size):
    super(ActorCritic, self).__init__()
    self.state_size = STATE_SIZE[0] * STATE_SIZE[1] * STATE_SIZE[2]

    self.elu = nn.ELU(inplace=True)
    self.softmax = nn.Softmax()
    self.sigmoid = nn.Sigmoid()

    # Pass state into model body
    self.conv1 = nn.Conv2d(STATE_SIZE[0], 32, 4, stride=2)
    self.conv2 = nn.Conv2d(32, 32, 3)
    self.fc1 = nn.Linear(1152, hidden_size)
    # Pass previous action, reward and timestep directly into LSTM
    self.lstm = nn.LSTMCell(hidden_size + ACTION_SIZE + 2, hidden_size)
    self.fc_actor1 = nn.Linear(hidden_size, ACTION_SIZE)
    self.fc_critic1 = nn.Linear(hidden_size, ACTION_SIZE)
    self.fc_actor2 = nn.Linear(hidden_size, ACTION_SIZE)
    self.fc_critic2 = nn.Linear(hidden_size, ACTION_SIZE)
    self.fc_class = nn.Linear(hidden_size, 1)

    # Orthogonal weight initialisation
    for name, p in self.named_parameters():
      if 'weight' in name:
        init.orthogonal(p)
      elif 'bias' in name:
        init.constant(p, 0)
    # Set LSTM forget gate bias to 1
    for name, p in self.lstm.named_parameters():
      if 'bias' in name:
        n = p.size(0)
        forget_start_idx, forget_end_idx = n // 4, n // 2
        init.constant(p[forget_start_idx:forget_end_idx], 1)