def __init__(self, n_history, n_action, on_gpu=False):
self.n_history = n_history
self.n_action = n_action
self.on_gpu = on_gpu
super(Q, self).__init__(
l1=F.Convolution2D(n_history, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2=F.Convolution2D(32, 64, ksize=3, stride=2, nobias=False, wscale=np.sqrt(2)),
l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
out=F.Linear(512, self.n_action, initialW=np.zeros((n_action, 512), dtype=np.float32))
)
if on_gpu:
self.to_gpu()
python类Linear()的实例源码
def __init__(self, insize, outsize):
self.insize = insize
self.outsize = outsize
self.h = None
self.c = None
self.peep_dim = insize + 2 * outsize
super(DyerLSTM, self).__init__(
linear_in=F.Linear(self.peep_dim, outsize, bias=0.25,
wscale=_dyer_init(self.peep_dim, outsize)),
linear_c=F.Linear(insize + outsize, outsize,
wscale=_dyer_init(insize + outsize, outsize)),
linear_out=F.Linear(self.peep_dim, outsize,
wscale=_dyer_init(self.peep_dim, outsize))
)
def test_forward_invalid(self):
f = F.Linear(5, 5)
# OK
v = chainer.Variable(numpy.random.randn(1, 5).astype(numpy.float32))
result = f(v)
assert isinstance(result, chainer.Variable)
# Incorrect dtype
# in py3, numpy dtypes are represented as class
msg = """\
Invalid operation is performed in: LinearFunction \\(Forward\\)
Expect: in_types\\[0\\]\\.dtype == <(type|class) 'numpy\\.float32'>
Actual: float64 \\!= <(type|class) 'numpy\\.float32'>"""
v = chainer.Variable(numpy.random.randn(1, 5))
with six.assertRaisesRegex(self, chainer.utils.type_check.InvalidType,
msg):
f(v)
# Incorrect dim
msg = """\
Invalid operation is performed in: LinearFunction \\(Forward\\)
Expect: in_types\\[0\\]\\.ndim >= 2
Actual: 1 < 2"""
v = chainer.Variable(numpy.random.randn(5).astype(numpy.float32))
with six.assertRaisesRegex(self, chainer.utils.type_check.InvalidType,
msg):
f(v)
def __init__(self, use_gpu, enable_controller, dim):
self.use_gpu = use_gpu
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller
self.dim = dim
print("Initializing Q-Network...")
hidden_dim = 256
self.model = FunctionSet(
l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
q_value=F.Linear(hidden_dim, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, hidden_dim),
dtype=np.float32))
)
if self.use_gpu >= 0:
self.model.to_gpu()
self.model_target = copy.deepcopy(self.model)
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, action, other_action, size, epsilon=0.05, hidden=200):
self.action = action
self.other_action = other_action
self.width = size * size
self.epsilon = epsilon
self.hidden = hidden
super(ChainerAgent, self).__init__(
l1=F.Linear(self.width, self.hidden, wscale=np.sqrt(2)),
l2=F.Linear(self.hidden, 1, wscale=np.sqrt(2)),
)
def __init__(self, state_dim = STATE_DIM):
super(Q, self).__init__(
l1=F.Linear(state_dim, 50),
l2=F.Linear(50, 50),
q_value=F.Linear(50, 5)
)
def __init__(self, state_dim = (STATE_DIM)):
super(Q, self).__init__(
l1=F.Linear(state_dim, 50),
l2=F.Linear(50, 50),
q_value=F.Linear(50, NUM_ACTIONS)
)
def __init__(self, state_dim = STATE_DIM):
super(Q, self).__init__(
l1=F.Linear(state_dim, 50),
l2=F.Linear(50, 50),
q_value=F.Linear(50, 5)
)
def __init__(self, enable_controller=[0, 1, 3, 4]):
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller # Default setting : "Breakout"
print "Initializing DDQN..."
# Initialization of Chainer 1.1.0 or older.
# print "CUDA init"
# cuda.init()
print "Model Building"
self.model = FunctionSet(
l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
q_value=F.Linear(512, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, 512),
dtype=np.float32))
).to_gpu()
if args.resumemodel:
# load saved model
serializers.load_npz(args.resumemodel, self.model)
print "load model from resume.model"
self.model_target = copy.deepcopy(self.model)
print "Initizlizing Optimizer"
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
if args.resumeD1 and args.resumeD2:
# load saved D1 and D2
npz_tmp1 = np.load(args.resumeD1)
npz_tmp2 = np.load(args.resumeD2)
self.D = [npz_tmp1['D0'],
npz_tmp1['D1'],
npz_tmp1['D2'],
npz_tmp2['D3'],
npz_tmp2['D4']]
npz_tmp1.close()
npz_tmp2.close()
print "loaded stored D1 and D2"
else:
self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool)]
print "initialize D data"