def __init__(self, vocab_size, hidden_size, num_layers, ignore_label=-1):
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.ignore_label = ignore_label
args = {'embed': L.EmbedID(vocab_size, hidden_size, ignore_label=ignore_label),
'hy': L.Linear(hidden_size, vocab_size)}
for i in range(self.num_layers):
args.update({'l{}'.format(i): L.StatelessLSTM(hidden_size, hidden_size)})
setattr(self, 'h{}'.format(i), None)
setattr(self, 'c{}'.format(i), None)
super(RNNLM, self).__init__(**args)
for param in self.params():
param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape)
self.reset_state()
python类Linear()的实例源码
def __init__(self, args):
super(LSTM, self).__init__(
# RNN
LSTM=L.LSTM(args.n_in_units, args.n_units),
#W_predict=L.Linear(args.n_units, args.n_units),
W_candidate=L.Linear(args.n_in_units, args.n_units),
)
#self.act1 = F.tanh
self.act1 = F.identity
self.args = args
self.n_in_units = args.n_in_units
self.n_units = args.n_units
self.dropout_ratio = args.d_ratio
self.margin = args.margin
self.initialize_parameters()
def __init__(self, n_class=1000):
super(VGG16, self).__init__()
with self.init_scope():
self.conv1_1 = L.Convolution2D(3, 64, 3, 1, 1)
self.conv1_2 = L.Convolution2D(64, 64, 3, 1, 1)
self.conv2_1 = L.Convolution2D(64, 128, 3, 1, 1)
self.conv2_2 = L.Convolution2D(128, 128, 3, 1, 1)
self.conv3_1 = L.Convolution2D(128, 256, 3, 1, 1)
self.conv3_2 = L.Convolution2D(256, 256, 3, 1, 1)
self.conv3_3 = L.Convolution2D(256, 256, 3, 1, 1)
self.conv4_1 = L.Convolution2D(256, 512, 3, 1, 1)
self.conv4_2 = L.Convolution2D(512, 512, 3, 1, 1)
self.conv4_3 = L.Convolution2D(512, 512, 3, 1, 1)
self.conv5_1 = L.Convolution2D(512, 512, 3, 1, 1)
self.conv5_2 = L.Convolution2D(512, 512, 3, 1, 1)
self.conv5_3 = L.Convolution2D(512, 512, 3, 1, 1)
self.fc6 = L.Linear(25088, 4096)
self.fc7 = L.Linear(4096, 4096)
self.fc8 = L.Linear(4096, n_class)
def __init__(self, in_size, out_size, hidden_sizes, nonlinearity=F.relu,
last_wscale=1):
self.in_size = in_size
self.out_size = out_size
self.hidden_sizes = hidden_sizes
self.nonlinearity = nonlinearity
super().__init__()
with self.init_scope():
if hidden_sizes:
hidden_layers = []
hidden_layers.append(L.Linear(in_size, hidden_sizes[0]))
for hin, hout in zip(hidden_sizes, hidden_sizes[1:]):
hidden_layers.append(L.Linear(hin, hout))
self.hidden_layers = chainer.ChainList(*hidden_layers)
self.output = L.Linear(hidden_sizes[-1], out_size,
initialW=LeCunNormal(last_wscale))
else:
self.output = L.Linear(in_size, out_size,
initialW=LeCunNormal(last_wscale))
def __init__(self, n_input_channels, n_dim_action, n_hidden_channels,
n_hidden_layers, action_space, scale_mu=True,
normalize_input=True):
self.n_input_channels = n_input_channels
self.n_hidden_layers = n_hidden_layers
self.n_hidden_channels = n_hidden_channels
assert action_space is not None
self.scale_mu = scale_mu
self.action_space = action_space
super().__init__()
with self.init_scope():
assert n_hidden_layers >= 1
self.hidden_layers = MLPBN(
in_size=n_input_channels, out_size=n_hidden_channels,
hidden_sizes=[n_hidden_channels] * (n_hidden_layers - 1),
normalize_input=normalize_input)
self.v = L.Linear(n_hidden_channels, 1)
self.mu = L.Linear(n_hidden_channels, n_dim_action)
self.mat_diag = L.Linear(n_hidden_channels, n_dim_action)
non_diag_size = n_dim_action * (n_dim_action - 1) // 2
if non_diag_size > 0:
self.mat_non_diag = L.Linear(n_hidden_channels, non_diag_size)
def init_like_torch(link):
# Mimic torch's default parameter initialization
# TODO(muupan): Use chainer's initializers when it is merged
for l in link.links():
if isinstance(l, L.Linear):
out_channels, in_channels = l.W.data.shape
stdv = 1 / np.sqrt(in_channels)
l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape)
if l.b is not None:
l.b.data[:] = np.random.uniform(-stdv, stdv,
size=l.b.data.shape)
elif isinstance(l, L.Convolution2D):
out_channels, in_channels, kh, kw = l.W.data.shape
stdv = 1 / np.sqrt(in_channels * kh * kw)
l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape)
if l.b is not None:
l.b.data[:] = np.random.uniform(-stdv, stdv,
size=l.b.data.shape)
def test_copy_param(self):
a = L.Linear(1, 5)
b = L.Linear(1, 5)
s = chainer.Variable(np.random.rand(1, 1).astype(np.float32))
a_out = list(a(s).data.ravel())
b_out = list(b(s).data.ravel())
self.assertNotEqual(a_out, b_out)
# Copy b's parameters to a
copy_param.copy_param(a, b)
a_out_new = list(a(s).data.ravel())
b_out_new = list(b(s).data.ravel())
self.assertEqual(a_out_new, b_out)
self.assertEqual(b_out_new, b_out)
def test_soft_copy_param(self):
a = L.Linear(1, 5)
b = L.Linear(1, 5)
a.W.data[:] = 0.5
b.W.data[:] = 1
# a = (1 - tau) * a + tau * b
copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)
np.testing.assert_almost_equal(a.W.data, np.full(a.W.data.shape, 0.55))
np.testing.assert_almost_equal(b.W.data, np.full(b.W.data.shape, 1.0))
copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1)
np.testing.assert_almost_equal(
a.W.data, np.full(a.W.data.shape, 0.595))
np.testing.assert_almost_equal(b.W.data, np.full(b.W.data.shape, 1.0))
def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50
policy = Sequence(
L.Linear(n_dim_obs, n_hidden_channels),
F.relu,
L.Linear(n_hidden_channels, n_hidden_channels),
F.relu,
L.LSTM(n_hidden_channels, n_hidden_channels),
policies.FCGaussianPolicy(
n_input_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high)
)
q_func = q_function.FCLSTMSAQFunction(
n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)
return chainer.Chain(policy=policy, q_function=q_func)
def __init__(self,args):
self.setArgs(args)
super(VAE, self).__init__(
embed = L.EmbedID(self.n_vocab,self.n_embed),
#VAEenc
enc_f = LSTM(self.n_layers,self.n_embed, self.out_size, dropout=self.drop_ratio),
enc_b = LSTM(self.n_layers,self.n_embed, self.out_size, dropout=self.drop_ratio),
le2_mu=L.Linear(4*self.out_size, self.n_latent),
le2_ln_var=L.Linear(4*self.out_size, self.n_latent),
#VAEdec
ld_h = L.Linear(self.n_latent,2*self.out_size),
ld_c = L.Linear(self.n_latent,2*self.out_size),
dec = LSTM(self.n_layers,self.n_embed, 2*self.out_size, dropout=self.drop_ratio),
h2w = L.Linear(2*self.out_size,self.n_vocab),
)
def __init__(self,args):
self.setArgs(args)
super(CVAEHidden, self).__init__(
categ_enc_b_h = L.EmbedID(self.categ_size,self.out_size),
categ_enc_b_c = L.EmbedID(self.categ_size,self.out_size),
categ_enc_f_h = L.EmbedID(self.categ_size,self.out_size),
categ_enc_f_c = L.EmbedID(self.categ_size,self.out_size),
categ_dec_h = L.EmbedID(self.categ_size,2*self.out_size),
categ_dec_c = L.EmbedID(self.categ_size,2*self.out_size),
embed = L.EmbedID(self.n_vocab,self.n_embed),
#VAEenc
enc_f = LSTM(self.n_layers,self.n_embed, self.out_size, dropout=self.drop_ratio),
enc_b = LSTM(self.n_layers,self.n_embed, self.out_size, dropout=self.drop_ratio),
le2_mu=L.Linear(4*self.out_size, self.n_latent),
le2_ln_var=L.Linear(4*self.out_size, self.n_latent),
#VAEdec
ld_h = L.Linear(self.n_latent,2*self.out_size),
ld_c = L.Linear(self.n_latent,2*self.out_size),
dec = LSTM(self.n_layers,self.n_embed, 2*self.out_size, dropout=self.drop_ratio),
h2w = L.Linear(2*self.out_size,self.n_vocab),
)
def __init__(self, category_num):
super(Googlenet, self).__init__()
modules = []
modules += [('conv1', Conv_BN_ReLU(3, 32, 3, 2, 1))]
modules += [('conv2', Conv_BN_ReLU(32, 32, 3, 1, 0))]
modules += [('conv3', Conv_BN_ReLU(32, 64, 3, 1, 1))]
modules += [('conv4', Conv_BN_ReLU(64, 64, 3, 1, 0))]
modules += [('conv5', Conv_BN_ReLU(64, 80, 3, 2, 1))]
modules += [('conv6', Conv_BN_ReLU(80, 192, 3, 1, 0))]
modules += [('inception_f5_1', Inception_A(192, (64, 96, 96), (48, 64), 32, 64, 'ave', 1))]
modules += [('inception_f5_2', Inception_A(256, (64, 96, 96), (48, 64), 64, 64, 'ave', 1))]
modules += [('inception_f5_3', Inception_A(288, (64, 96, 96), (288, 384), 0, 0, 'max', 2))]
modules += [('inception_f6_1', Inception_B(768, (128, 128, 128, 128, 192), (128, 128, 192), 192, 192, 'ave', 1, 7))]
modules += [('inception_f6_2', Inception_B(768, (160, 160, 160, 160, 192), (160, 160, 192), 192, 192, 'ave', 1, 7))]
modules += [('inception_f6_3', Inception_B(768, (160, 160, 160, 160, 192), (160, 160, 192), 192, 192, 'ave', 1, 7))]
modules += [('inception_f6_4', Inception_B(768, (192, 192, 192, 192, 192), (192, 192, 192), 192, 192, 'ave', 1, 7))]
modules += [('inception_f6_5', Inception_B(768, (192, 192, 192, 192), (192, 320), 0, 0, 'max', 2, 7))]
modules += [('inception_f7_1', Inception_C(1280, (448, 384, 384, 384), (384, 384, 384), 192, 320, 'ave', 3))]
modules += [('inception_f7_2', Inception_C(2048, (448, 384, 384, 384), (384, 384, 384), 192, 320, 'ave', 3))]
modules += [('linear', L.Linear(2048, category_num))]
# register layers
[self.add_link(*link) for link in modules]
self.modules = modules
self.name = 'googlenet_v3_{}'.format(category_num)
def __init__(
self, gpu=-1, trunk=VGG16, rpn_in_ch=512, rpn_out_ch=512,
n_anchors=9, feat_stride=16, anchor_scales='8,16,32',
num_classes=21, spatial_scale=0.0625, rpn_sigma=1.0, sigma=3.0):
super(FasterRCNN, self).__init__()
anchor_scales = [int(s) for s in anchor_scales.strip().split(',')]
self.add_link('trunk', trunk())
self.add_link('RPN', RPN(rpn_in_ch, rpn_out_ch, n_anchors, feat_stride,
anchor_scales, num_classes, rpn_sigma))
self.add_link('fc6', L.Linear(25088, 4096))
self.add_link('fc7', L.Linear(4096, 4096))
self.add_link('cls_score', L.Linear(4096, num_classes))
self.add_link('bbox_pred', L.Linear(4096, num_classes * 4))
self.train = True
self.gpu = gpu
self.sigma = sigma
self.spatial_scale = spatial_scale
self.proposal_target_layer = ProposalTargetLayer(num_classes)
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char, n_units, n_sentences):
super(Seq2seq, self).__init__(
embed_xw=L.EmbedID(n_source_vocab, n_units),
embed_xc=L.EmbedID(n_source_char, n_units),
embed_y=L.EmbedID(n_target_vocab, n_units * 2),
encoder_fw=L.NStepGRU(n_layers, n_units, n_units, 0.1),
encoder_bw=L.NStepGRU(n_layers, n_units, n_units, 0.1),
encoder_fc=L.NStepGRU(n_layers, n_units, n_units, 0.1),
encoder_bc=L.NStepGRU(n_layers, n_units, n_units, 0.1),
decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
W=L.Linear(n_units * 2, n_target_vocab),
)
self.n_layers = n_layers
self.n_units = n_units
self.n_params = 6
self.n_sentences = n_sentences
self.n_process = 0
self.n_sen = len(str(n_sentences))
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char, n_target_char, n_units, n_sentences):
super(Seq2seq, self).__init__(
embed_x=L.EmbedID(n_source_vocab, n_units),
embed_y=L.EmbedID(n_target_vocab, n_units * 2),
embed_xc=L.EmbedID(n_source_char, n_units),
embed_yc=L.EmbedID(n_target_char, n_units),
encoder_f=L.NStepGRU(n_layers, n_units, n_units, 0.1),
encoder_b=L.NStepGRU(n_layers, n_units, n_units, 0.1),
char_encoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
char_decoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
char_att_decoder=My.NStepGRU(n_layers, n_units, n_units, 0.1),
W=L.Linear(n_units * 2, n_target_vocab),
W_hat=L.Linear(n_units * 4, n_units),
W_char=L.Linear(n_units, n_target_char),
)
self.n_layers = n_layers
self.n_units = n_units
self.n_params = 7
self.n_sentences = n_sentences
self.n_process = 0
self.n_sen = len(str(n_sentences))
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char, n_target_char, n_units):
super(Seq2seq, self).__init__(
embed_x=L.EmbedID(n_source_vocab, n_units),
embed_y=L.EmbedID(n_target_vocab, n_units * 2),
embed_xc=L.EmbedID(n_source_char, n_units),
embed_yc=L.EmbedID(n_target_char, n_units),
encoder_f=L.NStepGRU(n_layers, n_units, n_units, 0.1),
encoder_b=L.NStepGRU(n_layers, n_units, n_units, 0.1),
char_encoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
char_decoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
char_att_decoder=My.NStepGRU(n_layers, n_units, n_units, 0.1),
W=L.Linear(n_units * 2, n_target_vocab),
W_hat=L.Linear(n_units * 4, n_units),
W_char=L.Linear(n_units, n_target_char),
)
self.n_layers = n_layers
self.n_units = n_units
self.n_params = 6
def __init__(self, obs_size, n_actions, n_hidden_channels=[1024,256]):
super(QFunction,self).__init__()
net = []
inpdim = obs_size
for i,n_hid in enumerate(n_hidden_channels):
net += [ ('l{}'.format(i), L.Linear( inpdim, n_hid ) ) ]
net += [ ('norm{}'.format(i), L.BatchNormalization( n_hid ) ) ]
net += [ ('_act{}'.format(i), F.relu ) ]
inpdim = n_hid
net += [('output', L.Linear( inpdim, n_actions) )]
with self.init_scope():
for n in net:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
self.forward = net
def __init__(self, n_hidden, bottom_width=4, ch=512, wscale=0.02):
super(Generator, self).__init__()
self.n_hidden = n_hidden
self.ch = ch
self.bottom_width = bottom_width
with self.init_scope():
w = chainer.initializers.Normal(wscale)
self.l0 = L.Linear(self.n_hidden, bottom_width * bottom_width * ch,
initialW=w)
self.dc1 = L.Deconvolution2D(ch, ch // 2, 4, 2, 1, initialW=w)
self.dc2 = L.Deconvolution2D(ch // 2, ch // 4, 4, 2, 1, initialW=w)
self.dc3 = L.Deconvolution2D(ch // 4, ch // 8, 4, 2, 1, initialW=w)
self.dc4 = L.Deconvolution2D(ch // 8, 3, 3, 1, 1, initialW=w)
self.bn0 = L.BatchNormalization(bottom_width * bottom_width * ch)
self.bn1 = L.BatchNormalization(ch // 2)
self.bn2 = L.BatchNormalization(ch // 4)
self.bn3 = L.BatchNormalization(ch // 8)
def __init__(self, bottom_width=4, ch=512, wscale=0.02):
w = chainer.initializers.Normal(wscale)
super(Discriminator, self).__init__()
with self.init_scope():
self.c0_0 = L.Convolution2D(3, ch // 8, 3, 1, 1, initialW=w)
self.c0_1 = L.Convolution2D(ch // 8, ch // 4, 4, 2, 1, initialW=w)
self.c1_0 = L.Convolution2D(ch // 4, ch // 4, 3, 1, 1, initialW=w)
self.c1_1 = L.Convolution2D(ch // 4, ch // 2, 4, 2, 1, initialW=w)
self.c2_0 = L.Convolution2D(ch // 2, ch // 2, 3, 1, 1, initialW=w)
self.c2_1 = L.Convolution2D(ch // 2, ch // 1, 4, 2, 1, initialW=w)
self.c3_0 = L.Convolution2D(ch // 1, ch // 1, 3, 1, 1, initialW=w)
self.l4 = L.Linear(bottom_width * bottom_width * ch, 1, initialW=w)
self.bn0_1 = L.BatchNormalization(ch // 4, use_gamma=False)
self.bn1_0 = L.BatchNormalization(ch // 4, use_gamma=False)
self.bn1_1 = L.BatchNormalization(ch // 2, use_gamma=False)
self.bn2_0 = L.BatchNormalization(ch // 2, use_gamma=False)
self.bn2_1 = L.BatchNormalization(ch // 1, use_gamma=False)
self.bn3_0 = L.BatchNormalization(ch // 1, use_gamma=False)
def init_like_torch(link):
# Mimic torch's default parameter initialization
# TODO(muupan): Use chainer's initializers when it is merged
for l in link.links():
if isinstance(l, L.Linear):
out_channels, in_channels = l.W.data.shape
stdv = 1 / np.sqrt(in_channels)
l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape)
if l.b is not None:
l.b.data[:] = np.random.uniform(-stdv, stdv,
size=l.b.data.shape)
elif isinstance(l, L.Convolution2D):
out_channels, in_channels, kh, kw = l.W.data.shape
stdv = 1 / np.sqrt(in_channels * kh * kw)
l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape)
if l.b is not None:
l.b.data[:] = np.random.uniform(-stdv, stdv,
size=l.b.data.shape)
def __init__(self, n_input_channels, n_actions,
n_hidden_layers=0, n_hidden_channels=None):
self.n_input_channels = n_input_channels
self.n_actions = n_actions
self.n_hidden_layers = n_hidden_layers
self.n_hidden_channels = n_hidden_channels
layers = []
if n_hidden_layers > 0:
layers.append(L.Linear(n_input_channels, n_hidden_channels))
for i in range(n_hidden_layers - 1):
layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
layers.append(L.Linear(n_hidden_channels, n_actions))
else:
layers.append(L.Linear(n_input_channels, n_actions))
super(FCSoftmaxPolicy, self).__init__(*layers)
def __init__(self, n_input_channels, n_hidden_layers=0,
n_hidden_channels=None):
self.n_input_channels = n_input_channels
self.n_hidden_layers = n_hidden_layers
self.n_hidden_channels = n_hidden_channels
layers = []
if n_hidden_layers > 0:
layers.append(L.Linear(n_input_channels, n_hidden_channels))
for i in range(n_hidden_layers - 1):
layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
layers.append(L.Linear(n_hidden_channels, 1))
else:
layers.append(L.Linear(n_input_channels, 1))
super(FCVFunction, self).__init__(*layers)
def __init__(self, in_size, out_size, kernel_size=2, attention=False,
decoder=False):
if kernel_size == 1:
super(QRNNLayer, self).__init__(W=Linear(in_size, 3 * out_size))
elif kernel_size == 2:
super(QRNNLayer, self).__init__(W=Linear(in_size, 3 * out_size, nobias=True),
V=Linear(in_size, 3 * out_size))
else:
super(QRNNLayer, self).__init__(
conv=L.ConvolutionND(1, in_size, 3 * out_size, kernel_size,
stride=1, pad=kernel_size - 1))
if attention:
self.add_link('U', Linear(out_size, 3 * in_size))
self.add_link('o', Linear(2 * out_size, out_size))
self.in_size, self.size, self.attention = in_size, out_size, attention
self.kernel_size = kernel_size
def __init__(self, n_actions):
initializer = chainer.initializers.HeNormal()
c1 = 32
c2 = 64
c3 = 64
fc_unit = 256
super(QFunction, self).__init__(
# the size of the inputs to each layer will be inferred
conv1=L.Convolution2D(4, c1, 8, stride=4, pad=0),
conv2=L.Convolution2D(c1, c2, 4, stride=2, pad=0),
conv3=L.Convolution2D(c2, c3, 3, stride=1, pad=0),
#conv4=L.Convolution2D(64, c4, 3, stride=1, pad=1),
fc1=L.Linear(3136, fc_unit, initialW=initializer),
fc2=L.Linear(fc_unit, n_actions, initialW=initializer),
#bnorm1=L.BatchNormalization(c1),
#bnorm2=L.BatchNormalization(c2),
#bnorm3=L.BatchNormalization(c3),
#bnorm4=L.BatchNormalization(c4),
)
def __init__(self,
src_vcb_num,
trg_vcb_num,
dim_emb,
dim_hid):
lstm_init_bias = get_lstm_init_bias(dim_hid)
super().__init__(
src_emb=L.EmbedID(src_vcb_num, dim_emb, ignore_label=-1),
encoder=BiLstmEncoder(dim_emb, dim_hid),
# decoder (TODO: make Decoder class)
trg_emb=L.EmbedID(trg_vcb_num, dim_emb, ignore_label=-1),
eh=L.Linear(dim_emb, dim_hid * 4, initial_bias=lstm_init_bias),
hh=L.Linear(dim_hid, dim_hid * 4, nobias=True),
ho=L.Linear(dim_hid, trg_vcb_num),
)
self.dim_hid = dim_hid
self.dim_emb = dim_emb
self.src_vcb_num = src_vcb_num
self.trg_vcb_num = trg_vcb_num
def __init__(self,
src_vcb_num,
trg_vcb_num,
dim_emb,
dim_hid,
attention_type='dot'):
super().__init__(src_vcb_num,
trg_vcb_num,
dim_emb,
dim_hid)
self.add_link('w_c', L.Linear(2*dim_hid, dim_hid))
atten_components = get_attention_components(attention_type, dim_hid)
for k, v in atten_components.items():
self.add_link(k, v)
self.attention_type = attention_type
def __init__(self, embeddings, n_labels, dropout=0.5, train=True):
vocab_size, embed_size = embeddings.shape
feature_size = embed_size
super(BLSTMBase, self).__init__(
embed=L.EmbedID(
in_size=vocab_size,
out_size=embed_size,
initialW=embeddings,
),
f_lstm=LSTM(feature_size, feature_size, dropout),
b_lstm=LSTM(feature_size, feature_size, dropout),
linear=L.Linear(feature_size * 2, n_labels),
)
self._dropout = dropout
self._n_labels = n_labels
self.train = train
def __init__(self):
super(MDL_full, self).__init__(
convR1=L.Convolution2D(3, 96, 11, stride=4),
convR2=L.Convolution2D(96, 256, 5, pad=2),
convR3=L.Convolution2D(256, 384, 3, pad=1),
convR4=L.Convolution2D(384, 384, 3, pad=1),
convR5=L.Convolution2D(384, 256, 3, pad=1),
fcR6=L.Linear(9216, 4096),
fcR7=L.Linear(4096, 4096),
convD1=L.Convolution2D(3, 96, 11, stride=4),
convD2=L.Convolution2D(96, 256, 5, pad=2),
convD3=L.Convolution2D(256, 384, 3, pad=1),
convD4=L.Convolution2D(384, 384, 3, pad=1),
convD5=L.Convolution2D(384, 256, 3, pad=1),
fcD6=L.Linear(9216, 4096),
fcD7=L.Linear(4096, 4096),
fc8=L.Bilinear(4096, 4096, 4096),
fc9=L.Linear(4096, 1000),
)
self.train = True
def __init__(self, n, h, in_size, in_channels, embed_size, block_size):
super().__init__(
l0=L.Convolution2D(in_channels, n, 3, stride=1, pad=1),
ln=L.Linear(None, h))
self.n_blocks = int(log2(in_size / embed_size)) + 1
self.block_size = block_size
for i in range(self.n_blocks):
n_in = (i + 1) * n
n_out = (i + 2) * n if i < self.n_blocks - 1 else n_in
for j in range(block_size - 1):
self.add_link('c{}'.format(i * block_size + j),
L.Convolution2D(n_in, n_in, 3, stride=1, pad=1))
self.add_link('c{}'.format(i * block_size + block_size - 1),
L.Convolution2D(n_in, n_out, 3, stride=1, pad=1))
def __init__(self, d, f, R, gpu):
self.d = d
self.f = f
self.R = R
self.gpu = gpu
g = ChainList(*[L.Linear(1, f) for i in six.moves.range(AtomIdMax)])
H = ChainList(*[L.Linear(f, f) for i in six.moves.range(R)])
W = ChainList(*[L.Linear(f, d) for i in six.moves.range(R + 1)])
self.optimizer = optimizers.Adam()
self.model = Chain(H=H, W=W, g=g)
if gpu:
self.model.to_gpu(0)
self.optimizer.setup(self.model)
self.to = [[] for i in six.moves.range(2)]
self.atom_sid = [[] for i in six.moves.range(2)]
self.anum = [[] for i in six.moves.range(2)]