def __init__(self, embedding_tokens):
super(Net, self).__init__()
question_features = 1024
vision_features = config.output_features
glimpses = 2
self.text = TextProcessor(
embedding_tokens=embedding_tokens,
embedding_features=300,
lstm_features=question_features,
drop=0.5,
)
self.attention = Attention(
v_features=vision_features,
q_features=question_features,
mid_features=512,
glimpses=2,
drop=0.5,
)
self.classifier = Classifier(
in_features=glimpses * vision_features + question_features,
mid_features=1024,
out_features=config.max_answers,
drop=0.5,
)
for m in self.modules():
if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
init.xavier_uniform(m.weight)
if m.bias is not None:
m.bias.data.zero_()
python类xavier_uniform()的实例源码
def __init__(self, embedding_tokens, embedding_features, lstm_features, drop=0.0):
super(TextProcessor, self).__init__()
self.embedding = nn.Embedding(embedding_tokens, embedding_features, padding_idx=0)
self.drop = nn.Dropout(drop)
self.tanh = nn.Tanh()
self.lstm = nn.LSTM(input_size=embedding_features,
hidden_size=lstm_features,
num_layers=1)
self.features = lstm_features
self._init_lstm(self.lstm.weight_ih_l0)
self._init_lstm(self.lstm.weight_hh_l0)
self.lstm.bias_ih_l0.data.zero_()
self.lstm.bias_hh_l0.data.zero_()
init.xavier_uniform(self.embedding.weight)
def _init_lstm(self, weight):
for w in weight.chunk(4, 0):
init.xavier_uniform(w)
def initWeight(self, init_forget_bias=1):
# See details in https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py
for name, params in self.named_parameters():
# weight?xavier????
if 'weight' in name:
init.xavier_uniform(params)
# ??????????GRU?b_iz, b_hz????
elif 'gru.bias_ih_l' in name:
b_ir, b_iz, b_in = params.chunk(3, 0)
init.constant(b_iz, init_forget_bias)
elif 'gru.bias_hh_l' in name:
b_hr, b_hz, b_hn = params.chunk(3, 0)
init.constant(b_hz, init_forget_bias)
# ?????bias?0????
else:
init.constant(params, 0)
def initWeight(self, init_forget_bias=1):
# See https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py
for name, params in self.named_parameters():
# weight?xavier????
if 'weight' in name:
init.xavier_uniform(params)
# ??????????LSTM?b_if, b_hf????
elif 'lstm.bias_ih_l' in name:
b_ii, b_if, b_ig, b_i0 = params.chunk(4, 0)
init.constant(b_if, init_forget_bias)
elif 'lstm.bias_hh_l' in name:
b_hi, b_hf, b_hg, b_h0 = params.chunk(4, 0)
init.constant(b_hf, init_forget_bias)
# ?????bias?0????
else:
init.constant(params, 0)
def weights_init(m):
# classname = m.__class__.__name__
if isinstance(m, nn.Conv2d):
#print('init conv2d')
#init.xavier_uniform(m.weight.data, gain=np.sqrt(2.0))
init.kaiming_uniform(m.weight.data, mode='fan_in')
# m.weight.data.normal_(0.0, 0.02)
if isinstance(m, nn.Linear):
#print('init fc')
init.kaiming_uniform(m.weight.data, mode='fan_in')
# size = m.weight.size()
# fan_out = size[0] # number of rows
# fan_in = size[1] # number of columns
# variance = np.sqrt(2.0/(fan_in + fan_out))
# m.weight.data.uniform_(0.0, variance)
def __init__(self, input_size, width=3, dropout=0.2, nopad=False):
super(GatedConv, self).__init__()
self.conv = WeightNormConv2d(input_size, 2 * input_size,
kernel_size=(width, 1), stride=(1, 1),
padding=(width // 2 * (1 - nopad), 0))
init.xavier_uniform(self.conv.weight, gain=(4 * (1 - dropout))**0.5)
self.dropout = nn.Dropout(dropout)
def initialize_weights(self):
conv_layers = [v for k,v in self._modules.iteritems() if 'conv' in k]
for layer in conv_layers:
init.xavier_uniform(layer.weight)
init.xavier_uniform(self.head.weight)
init.xavier_uniform(self.fc.weight)
def initialize_weights(self):
conv_layers = [v for k,v in self._modules.iteritems() if 'conv' in k]
for layer in conv_layers:
init.xavier_uniform(layer.weight)
init.xavier_uniform(self.head.weight)
def xavier(param):
init.xavier_uniform(param)
def initNetParams(net):
'''Init net parameters.'''
for m in net.modules():
if isinstance(m, nn.Conv2d):
init.xavier_uniform(m.weight)
if m.bias:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
init.constant(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal(m.weight, std=1e-3)
if m.bias:
init.constant(m.bias, 0)
def __init__(self, input_dim, output_dim, dropout=0, softmax_boost=1.0):
super(ProposalUniformDiscrete, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, output_dim)
self.drop = nn.Dropout(dropout)
self.softmax_boost = softmax_boost
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, dropout=0):
super(ProposalNormal, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, 2)
self.drop = nn.Dropout(dropout)
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, dropout=0):
super(ProposalLaplace, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, 2)
self.drop = nn.Dropout(dropout)
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, dropout=0, softmax_boost=1.0):
super(ProposalFlip, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, 1)
self.drop = nn.Dropout(dropout)
self.softmax_boost = softmax_boost
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, output_dim, dropout=0, softmax_boost=1.0):
super(ProposalDiscrete, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, output_dim)
self.drop = nn.Dropout(dropout)
self.softmax_boost = softmax_boost
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
def __init__(self, input_dim, dropout=0, softplus_boost=1.0):
super(ProposalUniformContinuous, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, 2)
self.drop = nn.Dropout(dropout)
self.softplus_boost = softplus_boost
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, mixture_components=10, dropout=0):
super(ProposalUniformContinuousAlt, self).__init__()
self.mixture_components = mixture_components
self.output_dim = 3 * mixture_components
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, self.output_dim)
self.drop = nn.Dropout(dropout)
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def __init__(self, input_dim, dropout=0, softplus_boost=1.0):
super(ProposalGamma, self).__init__()
self.lin1 = nn.Linear(input_dim, input_dim)
self.lin2 = nn.Linear(input_dim, 2)
self.drop = nn.Dropout(dropout)
self.softplus_boost = softplus_boost
init.xavier_uniform(self.lin1.weight, gain=init.calculate_gain('relu'))
init.xavier_uniform(self.lin2.weight)
def initializationhelper(param, nltype):
c = 0.1
torchinit.uniform(param.weight, a=-c, b=c)
#torchinit.xavier_uniform(param.weight, gain=c*torchinit.calculate_gain(nltype))
c = 0.1
torchinit.uniform(param.bias, a=-c, b=c)