def __init__(self, use_gpu, enable_controller, dim):
self.use_gpu = use_gpu
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller
self.dim = dim
print("Initializing Q-Network...")
hidden_dim = 256
self.model = FunctionSet(
l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
q_value=F.Linear(hidden_dim, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, hidden_dim),
dtype=np.float32))
)
if self.use_gpu >= 0:
self.model.to_gpu()
self.model_target = copy.deepcopy(self.model)
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool)]
评论列表
文章目录