def __init__(self, obs_size, action_space,
n_hidden_layers=2, n_hidden_channels=64,
bound_mean=None, normalize_obs=None):
assert bound_mean in [False, True]
assert normalize_obs in [False, True]
super().__init__()
hidden_sizes = (n_hidden_channels,) * n_hidden_layers
self.normalize_obs = normalize_obs
with self.init_scope():
self.pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
obs_size, action_space.low.size,
n_hidden_layers, n_hidden_channels,
var_type='diagonal', nonlinearity=F.tanh,
bound_mean=bound_mean,
min_action=action_space.low, max_action=action_space.high,
mean_wscale=1e-2)
self.v = links.MLP(obs_size, 1, hidden_sizes=hidden_sizes)
if self.normalize_obs:
self.obs_filter = links.EmpiricalNormalization(
shape=obs_size
)
评论列表
文章目录