def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50
policy = policies.FCGaussianPolicy(
n_input_channels=n_dim_obs,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high)
q_func = q_function.FCSAQFunction(
n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)
return chainer.Chain(policy=policy, q_function=q_func)
评论列表
文章目录