def __init__(self, hidden_size):
super(ActorCritic, self).__init__()
self.state_size = STATE_SIZE[0] * STATE_SIZE[1] * STATE_SIZE[2]
self.elu = nn.ELU(inplace=True)
self.softmax = nn.Softmax()
self.sigmoid = nn.Sigmoid()
# Pass state into model body
self.conv1 = nn.Conv2d(STATE_SIZE[0], 32, 4, stride=2)
self.conv2 = nn.Conv2d(32, 32, 3)
self.fc1 = nn.Linear(1152, hidden_size)
# Pass previous action, reward and timestep directly into LSTM
self.lstm = nn.LSTMCell(hidden_size + ACTION_SIZE + 2, hidden_size)
self.fc_actor1 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_critic1 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_actor2 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_critic2 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_class = nn.Linear(hidden_size, 1)
# Orthogonal weight initialisation
for name, p in self.named_parameters():
if 'weight' in name:
init.orthogonal(p)
elif 'bias' in name:
init.constant(p, 0)
# Set LSTM forget gate bias to 1
for name, p in self.lstm.named_parameters():
if 'bias' in name:
n = p.size(0)
forget_start_idx, forget_end_idx = n // 4, n // 2
init.constant(p[forget_start_idx:forget_end_idx], 1)
评论列表
文章目录