def reset_parameters(self):
"""
Initialize parameters following the way proposed in the paper.
"""
# The input-to-hidden weight matrix is initialized orthogonally.
init.orthogonal(self.weight_ih.data)
# The hidden-to-hidden weight matrix is initialized as an identity
# matrix.
weight_hh_data = torch.eye(self.hidden_size)
weight_hh_data = weight_hh_data.repeat(1, 4)
self.weight_hh.data.set_(weight_hh_data)
# The bias is just set to zero vectors.
init.constant(self.bias.data, val=0)
# Initialization of BN parameters.
self.bn_ih.reset_parameters()
self.bn_hh.reset_parameters()
self.bn_c.reset_parameters()
self.bn_ih.bias.data.fill_(0)
self.bn_hh.bias.data.fill_(0)
self.bn_ih.weight.data.fill_(0.1)
self.bn_hh.weight.data.fill_(0.1)
self.bn_c.weight.data.fill_(0.1)
python类orthogonal()的实例源码
def __init__(self, vocab_dict, dropout_rate, embed_dim, hidden_dim, bidirectional=True):
super(AoAReader, self).__init__()
self.vocab_dict = vocab_dict
self.hidden_dim = hidden_dim
self.embed_dim = embed_dim
self.dropout_rate = dropout_rate
self.embedding = nn.Embedding(vocab_dict.size(),
self.embed_dim,
padding_idx=Constants.PAD)
self.embedding.weight.data.uniform_(-0.05, 0.05)
input_size = self.embed_dim
self.gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate,
bidirectional=bidirectional, batch_first=True)
# try independent gru
#self.query_gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate,
# bidirectional=bidirectional, batch_first=True)
for weight in self.gru.parameters():
if len(weight.size()) > 1:
weigth_init.orthogonal(weight.data)
def reset_parameters(self):
"""
Initialize parameters following the way proposed in the paper.
"""
# The input-to-hidden weight matrix is initialized orthogonally.
init.orthogonal(self.weight_ih.data)
# The hidden-to-hidden weight matrix is initialized as an identity
# matrix.
weight_hh_data = torch.eye(self.hidden_size)
weight_hh_data = weight_hh_data.repeat(1, 4)
self.weight_hh.data.set_(weight_hh_data)
# The bias is just set to zero vectors.
init.constant(self.bias.data, val=0)
# Initialization of BN parameters.
self.bn_ih.reset_parameters()
self.bn_hh.reset_parameters()
self.bn_c.reset_parameters()
self.bn_ih.bias.data.fill_(0)
self.bn_hh.bias.data.fill_(0)
self.bn_ih.weight.data.fill_(0.1)
self.bn_hh.weight.data.fill_(0.1)
self.bn_c.weight.data.fill_(0.1)
def reset_parameters(self):
if self.use_leaf_rnn:
init.kaiming_normal(self.leaf_rnn_cell.weight_ih.data)
init.orthogonal(self.leaf_rnn_cell.weight_hh.data)
init.constant(self.leaf_rnn_cell.bias_ih.data, val=0)
init.constant(self.leaf_rnn_cell.bias_hh.data, val=0)
# Set forget bias to 1
self.leaf_rnn_cell.bias_ih.data.chunk(4)[1].fill_(1)
if self.bidirectional:
init.kaiming_normal(self.leaf_rnn_cell_bw.weight_ih.data)
init.orthogonal(self.leaf_rnn_cell_bw.weight_hh.data)
init.constant(self.leaf_rnn_cell_bw.bias_ih.data, val=0)
init.constant(self.leaf_rnn_cell_bw.bias_hh.data, val=0)
# Set forget bias to 1
self.leaf_rnn_cell_bw.bias_ih.data.chunk(4)[1].fill_(1)
else:
init.kaiming_normal(self.word_linear.weight.data)
init.constant(self.word_linear.bias.data, val=0)
self.treelstm_layer.reset_parameters()
init.normal(self.comp_query.data, mean=0, std=0.01)
def _initialize_weights(self):
init.orthogonal(self.conv1.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv2.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv3.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv4.weight)
def __init__(self, observation_space, non_rgb_rgb_state_size, action_space,
hidden_size):
super(ActorCritic, self).__init__()
self.rgb_state_size = (6, 128, 128)
self.action_size = 5
self.relu = nn.ReLU(inplace=True)
self.softmax = nn.Softmax()
# the archtecture is adapted from Sim2Real (Rusu et. al., 2016)
self.conv1 = nn.Conv2d(
self.rgb_state_size[0], 16, 8, stride=4, padding=1)
self.conv2 = nn.Conv2d(16, 32, 5, stride=2)
self.fc1 = nn.Linear(1152 + non_rgb_rgb_state_size, hidden_size)
self.lstm = nn.LSTMCell(hidden_size, hidden_size)
self.fc_actor1 = nn.Linear(hidden_size, self.action_size)
self.fc_actor2 = nn.Linear(hidden_size, self.action_size)
self.fc_actor3 = nn.Linear(hidden_size, self.action_size)
self.fc_actor4 = nn.Linear(hidden_size, self.action_size)
self.fc_actor5 = nn.Linear(hidden_size, self.action_size)
self.fc_actor6 = nn.Linear(hidden_size, self.action_size)
self.fc_critic = nn.Linear(hidden_size, 1)
# Orthogonal weight initialisation
for name, p in self.named_parameters():
if 'weight' in name:
init.orthogonal(p)
elif 'bias' in name:
init.constant(p, 0)
def __init__(self, orthogonal_gain=1.):
super(OrthogonalWeightsZeroBias, self)\
.__init__(weight_initializer=partial(init.orthogonal, gain=orthogonal_gain),
bias_initializer=Constant(0.))
def init_gru(cell, gain=1):
cell.reset_parameters()
# orthogonal initialization of recurrent weights
for _, hh, _, _ in cell.all_weights:
for i in range(0, hh.size(0), cell.hidden_size):
I.orthogonal(hh[i:i + cell.hidden_size], gain=gain)
def reset_parameters(self):
"""
Initialize parameters following the way proposed in the paper.
"""
init.orthogonal(self.weight_ih.data)
weight_hh_data = torch.eye(self.hidden_size)
weight_hh_data = weight_hh_data.repeat(1, 4)
self.weight_hh.data.set_(weight_hh_data)
# The bias is just set to zero vectors.
if self.use_bias:
init.constant(self.bias.data, val=0)
def __init__(self, hidden_size):
super(ActorCritic, self).__init__()
self.state_size = STATE_SIZE[0] * STATE_SIZE[1] * STATE_SIZE[2]
self.elu = nn.ELU(inplace=True)
self.softmax = nn.Softmax()
self.sigmoid = nn.Sigmoid()
# Pass state into model body
self.conv1 = nn.Conv2d(STATE_SIZE[0], 32, 4, stride=2)
self.conv2 = nn.Conv2d(32, 32, 3)
self.fc1 = nn.Linear(1152, hidden_size)
# Pass previous action, reward and timestep directly into LSTM
self.lstm = nn.LSTMCell(hidden_size + ACTION_SIZE + 2, hidden_size)
self.fc_actor1 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_critic1 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_actor2 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_critic2 = nn.Linear(hidden_size, ACTION_SIZE)
self.fc_class = nn.Linear(hidden_size, 1)
# Orthogonal weight initialisation
for name, p in self.named_parameters():
if 'weight' in name:
init.orthogonal(p)
elif 'bias' in name:
init.constant(p, 0)
# Set LSTM forget gate bias to 1
for name, p in self.lstm.named_parameters():
if 'bias' in name:
n = p.size(0)
forget_start_idx, forget_end_idx = n // 4, n // 2
init.constant(p[forget_start_idx:forget_end_idx], 1)
def test_orthogonal(self):
for as_variable in [True, False]:
for use_gain in [True, False]:
for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
input_tensor = torch.zeros(tensor_size)
gain = 1.0
if as_variable:
input_tensor = Variable(input_tensor)
if use_gain:
gain = self._random_float(0.1, 2)
init.orthogonal(input_tensor, gain=gain)
else:
init.orthogonal(input_tensor)
if as_variable:
input_tensor = input_tensor.data
rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
flattened_tensor = input_tensor.view(rows, cols)
if rows > cols:
self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
torch.eye(cols) * gain ** 2, prec=1e-6)
else:
self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
torch.eye(rows) * gain ** 2, prec=1e-6)
def reset_parameters(self):
"""
Initialize parameters following the way proposed in the paper.
"""
init.orthogonal(self.weight_ih.data)
weight_hh_data = torch.eye(self.hidden_size)
weight_hh_data = weight_hh_data.repeat(1, 4)
self.weight_hh.data.set_(weight_hh_data)
# The bias is just set to zero vectors.
if self.use_bias:
init.constant(self.bias.data, val=0)
def orthogonal(w, gain=1):
return nn.orthogonal(w, gain=gain)
def test_orthogonal(self):
for as_variable in [True, False]:
for use_gain in [True, False]:
for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
input_tensor = torch.zeros(tensor_size)
gain = 1.0
if as_variable:
input_tensor = Variable(input_tensor)
if use_gain:
gain = self._random_float(0.1, 2)
init.orthogonal(input_tensor, gain=gain)
else:
init.orthogonal(input_tensor)
if as_variable:
input_tensor = input_tensor.data
rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
flattened_tensor = input_tensor.view(rows, cols)
if rows > cols:
self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
torch.eye(cols) * gain ** 2, prec=1e-6)
else:
self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
torch.eye(rows) * gain ** 2, prec=1e-6)
def initWeights(net, scheme='orthogonal'):
print('Initializing weights. Warning: may overwrite sensitive bias parameters (e.g. batchnorm)')
for e in net.parameters():
if scheme == 'orthogonal':
if len(e.size()) >= 2:
init.orthogonal(e)
elif scheme == 'normal':
init.normal(e, std=1e-2)
elif scheme == 'xavier':
init.xavier_normal(e)
def test_orthogonal(self):
for as_variable in [True, False]:
for use_gain in [True, False]:
for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
input_tensor = torch.zeros(tensor_size)
gain = 1.0
if as_variable:
input_tensor = Variable(input_tensor)
if use_gain:
gain = self._random_float(0.1, 2)
init.orthogonal(input_tensor, gain=gain)
else:
init.orthogonal(input_tensor)
if as_variable:
input_tensor = input_tensor.data
rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
flattened_tensor = input_tensor.view(rows, cols)
if rows > cols:
self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
torch.eye(cols) * gain ** 2, prec=1e-6)
else:
self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
torch.eye(rows) * gain ** 2, prec=1e-6)
def weights_init_orthogonal(m):
classname = m.__class__.__name__
print(classname)
if classname.find('Conv') != -1:
init.orthogonal(m.weight.data, gain=1)
elif classname.find('Linear') != -1:
init.orthogonal(m.weight.data, gain=1)
elif classname.find('BatchNorm2d') != -1:
init.normal(m.weight.data, 1.0, 0.02)
init.constant(m.bias.data, 0.0)
def init_weights(net, init_type='normal'):
print('initialization method [%s]' % init_type)
if init_type == 'normal':
net.apply(weights_init_normal)
elif init_type == 'xavier':
net.apply(weights_init_xavier)
elif init_type == 'kaiming':
net.apply(weights_init_kaiming)
elif init_type == 'orthogonal':
net.apply(weights_init_orthogonal)
else:
raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
def reset_parameters(self):
"""
Initialize parameters TO DO
"""
init.uniform(self.thetaA, a=-0.1, b=0.1)
init.uniform(self.thetaB, a=-0.1, b=0.1)
init.uniform(self.U, a=-0.1, b=0.1)
init.orthogonal(self.gate_U.data)
gate_W_data = torch.eye(self.hidden_size)
gate_W_data = gate_W_data.repeat(1, 2)
self.gate_W.data.set_(gate_W_data)
init.constant(self.bias.data, val=0)
init.constant(self.gate_bias.data, val=0)
def test_orthogonal(self):
for as_variable in [True, False]:
for use_gain in [True, False]:
for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
input_tensor = torch.zeros(tensor_size)
gain = 1.0
if as_variable:
input_tensor = Variable(input_tensor)
if use_gain:
gain = self._random_float(0.1, 2)
init.orthogonal(input_tensor, gain=gain)
else:
init.orthogonal(input_tensor)
if as_variable:
input_tensor = input_tensor.data
rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
flattened_tensor = input_tensor.view(rows, cols)
if rows > cols:
self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
torch.eye(cols) * gain ** 2, prec=1e-6)
else:
self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
torch.eye(rows) * gain ** 2, prec=1e-6)
# Generates rand tensor with non-equal values. This ensures that duplicate
# values won't be causing test failure for modules like MaxPooling.
# size should be small, otherwise randperm fails / long overflows.
def init_weights(net, init_type='normal'):
print('initialization method [%s]' % init_type)
if init_type == 'normal':
net.apply(weights_init_normal)
elif init_type == 'xavier':
net.apply(weights_init_xavier)
elif init_type == 'kaiming':
net.apply(weights_init_kaiming)
elif init_type == 'orthogonal':
net.apply(weights_init_orthogonal)
else:
raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
def __init__(self, frame_size, n_frame_samples, n_rnn, dim,
learn_h0, weight_norm):
super().__init__()
self.frame_size = frame_size
self.n_frame_samples = n_frame_samples
self.dim = dim
h0 = torch.zeros(n_rnn, dim)
if learn_h0:
self.h0 = torch.nn.Parameter(h0)
else:
self.register_buffer('h0', torch.autograd.Variable(h0))
self.input_expand = torch.nn.Conv1d(
in_channels=n_frame_samples,
out_channels=dim,
kernel_size=1
)
init.kaiming_uniform(self.input_expand.weight)
init.constant(self.input_expand.bias, 0)
if weight_norm:
self.input_expand = torch.nn.utils.weight_norm(self.input_expand)
self.rnn = torch.nn.GRU(
input_size=dim,
hidden_size=dim,
num_layers=n_rnn,
batch_first=True
)
for i in range(n_rnn):
nn.concat_init(
getattr(self.rnn, 'weight_ih_l{}'.format(i)),
[nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform]
)
init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0)
nn.concat_init(
getattr(self.rnn, 'weight_hh_l{}'.format(i)),
[nn.lecun_uniform, nn.lecun_uniform, init.orthogonal]
)
init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0)
self.upsampling = nn.LearnedUpsampling1d(
in_channels=dim,
out_channels=dim,
kernel_size=frame_size
)
init.uniform(
self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim)
)
init.constant(self.upsampling.bias, 0)
if weight_norm:
self.upsampling.conv_t = torch.nn.utils.weight_norm(
self.upsampling.conv_t
)
def weights_init_orthogonal(m):
classname = m.__class__.__name__
print(classname)
if classname.find('Conv') != -1:
init.orthogonal(m.weight.data, gain=1)
elif classname.find('Linear') != -1:
init.orthogonal(m.weight.data, gain=1)
elif classname.find('BatchNorm2d') != -1:
init.normal(m.weight.data, 1.0, 0.02)
init.constant(m.bias.data, 0.0)
def _initialize_weights(self):
init.orthogonal(self.conv1.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv2.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv3.weight, init.calculate_gain('relu'))
init.orthogonal(self.conv4.weight)
# Create the super-resolution model by using the above model definition.