def __init__(self):
super(mnist_model, self).__init__()
self.feats = nn.Sequential(
nn.Conv2d(1, 32, 5, 1, 1),
nn.MaxPool2d(2, 2),
nn.ReLU(True),
nn.BatchNorm2d(32),
nn.Conv2d(32, 64, 3, 1, 1),
nn.ReLU(True),
nn.BatchNorm2d(64),
nn.Conv2d(64, 64, 3, 1, 1),
nn.MaxPool2d(2, 2),
nn.ReLU(True),
nn.BatchNorm2d(64),
nn.Conv2d(64, 128, 3, 1, 1),
nn.ReLU(True),
nn.BatchNorm2d(128)
)
self.classifier = nn.Conv2d(128, 10, 1)
self.avgpool = nn.AvgPool2d(6, 6)
self.dropout = nn.Dropout(0.5)
python类nn()的实例源码
def __init__(self, input_size, feature_size = 128, hidden_size = 256, num_layers = 1, dropout = 0.9):
super(SeqEncoder, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
# set up modules for recurrent neural networks
self.rnn = nn.LSTM(input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
batch_first = True,
dropout = dropout,
bidirectional = True)
self.rnn.apply(weights_init)
# set up modules to compute features
self.feature = nn.Linear(hidden_size * 2, feature_size)
self.feature.apply(weights_init)
def __init__(self, bn=False):
super(MCNN, self).__init__()
self.branch1 = nn.Sequential(Conv2d( 1, 16, 9, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(16, 32, 7, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(32, 16, 7, same_padding=True, bn=bn),
Conv2d(16, 8, 7, same_padding=True, bn=bn))
self.branch2 = nn.Sequential(Conv2d( 1, 20, 7, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(20, 40, 5, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(40, 20, 5, same_padding=True, bn=bn),
Conv2d(20, 10, 5, same_padding=True, bn=bn))
self.branch3 = nn.Sequential(Conv2d( 1, 24, 5, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(24, 48, 3, same_padding=True, bn=bn),
nn.MaxPool2d(2),
Conv2d(48, 24, 3, same_padding=True, bn=bn),
Conv2d(24, 12, 3, same_padding=True, bn=bn))
self.fuse = nn.Sequential(Conv2d( 30, 1, 1, same_padding=True, bn=bn))
def __init__(self, config):
super(DreamModel, self).__init__()
# Model configuration
self.config = config
# Layer definitons
self.encode = torch.nn.Embedding(config.num_product,
config.embedding_dim,
padding_idx = 0) # Item embedding layer, ????
self.pool = {'avg':pool_avg, 'max':pool_max}[config.basket_pool_type] # Pooling of basket
# RNN type specify
if config.rnn_type in ['LSTM', 'GRU']:
self.rnn = getattr(torch.nn, config.rnn_type)(config.embedding_dim,
config.embedding_dim,
config.rnn_layer_num,
batch_first=True,
dropout=config.dropout)
else:
nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[config.rnn_type]
self.rnn = torch.nn.RNN(config.embedding_dim,
config.embedding_dim,
config.rnn_layer_num,
nonlinearity=nonlinearity,
batch_first=True,
dropout=config.dropout)
def forward(self, x, lengths, hidden):
# Basket Encoding
ub_seqs = [] # users' basket sequence
for user in x: # x shape (batch of user, time_step, indice of product) nested lists
embed_baskets = []
for basket in user:
basket = torch.LongTensor(basket).resize_(1, len(basket))
basket = basket.cuda() if self.config.cuda else basket # use cuda for acceleration
basket = self.encode(torch.autograd.Variable(basket)) # shape: 1, len(basket), embedding_dim
embed_baskets.append(self.pool(basket, dim = 1))
# concat current user's all baskets and append it to users' basket sequence
ub_seqs.append(torch.cat(embed_baskets, 1)) # shape: 1, num_basket, embedding_dim
# Input for rnn
ub_seqs = torch.cat(ub_seqs, 0).cuda() if self.config.cuda else torch.cat(ub_seqs, 0) # shape: batch_size, max_len, embedding_dim
packed_ub_seqs = torch.nn.utils.rnn.pack_padded_sequence(ub_seqs, lengths, batch_first=True) # packed sequence as required by pytorch
# RNN
output, h_u = self.rnn(packed_ub_seqs, hidden)
dynamic_user, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True) # shape: batch_size, max_len, embedding_dim
return dynamic_user, h_u
def __init__(self, batch_size, word_gru_hidden, feature_dim, n_classes, bidirectional=True):
super(MixtureSoftmax, self).__init__()
# for feature only model
word_gru_hidden = 0
# end
self.batch_size = batch_size
self.n_classes = n_classes
self.word_gru_hidden = word_gru_hidden
self.feature_dim = feature_dim
if bidirectional == True:
self.linear = nn.Linear(2 * 2 * word_gru_hidden + feature_dim, n_classes)
else:
self.linear = nn.Linear(2 * word_gru_hidden + feature_dim, n_classes)
def __init__(self, batch_size, word_gru_hidden, feature_dim, n_classes, bidirectional=True):
super(MixtureSoftmax, self).__init__()
# for feature only model
word_gru_hidden = 0
# end
self.batch_size = batch_size
self.n_classes = n_classes
self.word_gru_hidden = word_gru_hidden
self.feature_dim = feature_dim
if bidirectional == True:
self.linear = nn.Linear(2 * 2 * word_gru_hidden + feature_dim, n_classes)
else:
self.linear = nn.Linear(2 * word_gru_hidden + feature_dim, n_classes)
def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(group_width)
self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
self.bn2 = nn.BatchNorm2d(group_width)
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*group_width)
)
def __init__(self, in_channels, n_filters, k_size, stride, padding, bias=True):
super(conv2DBatchNorm, self).__init__()
self.cb_unit = nn.Sequential(nn.Conv2d(int(in_channels), int(n_filters), kernel_size=k_size,
padding=padding, stride=stride, bias=bias),
nn.BatchNorm2d(int(n_filters)),)
def classifyOneImage(model,img_pil,preprocess):
model.eval()
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
if use_gpu:
img_tensor = img_tensor.cuda()
img_variable = Variable(img_tensor)
out = model(img_variable)
m = nn.Softmax()
if use_gpu:
return m(out).cpu()
return(out)
#method == util.GradType.NAIVE or util.GradType.GUIDED
def _augment_module_post(net: nn.Module, callback_dict: dict) -> (dict, list):
backward_hook_remove_func_list = []
vis_param_dict = dict()
vis_param_dict['layer'] = None
vis_param_dict['index'] = None
vis_param_dict['method'] = GradType.NAIVE
for x, y in net.named_modules():
if not isinstance(y, nn.Sequential) and y is not net:
# I should add hook to all layers, in case they will be needed.
backward_hook_remove_func_list.append(
y.register_backward_hook(
partial(_backward_hook, module_name=x, callback_dict=callback_dict, vis_param_dict=vis_param_dict)))
def remove_handles():
for x in backward_hook_remove_func_list:
x.remove()
return vis_param_dict, remove_handles
def Occlusion_exp(image,occluding_size,occluding_stride,model,preprocess,classes,groundTruth):
img = np.copy(image)
height, width,_= img.shape
output_height = int(math.ceil((height-occluding_size)/occluding_stride+1))
output_width = int(math.ceil((width-occluding_size)/occluding_stride+1))
ocludedImages=[]
for h in range(output_height):
for w in range(output_width):
#occluder region
h_start = h*occluding_stride
w_start = w*occluding_stride
h_end = min(height, h_start + occluding_size)
w_end = min(width, w_start + occluding_size)
input_image = copy.copy(img)
input_image[h_start:h_end,w_start:w_end,:] = 0
ocludedImages.append(preprocess(Image.fromarray(input_image)))
L = np.empty(output_height*output_width)
L.fill(groundTruth)
L = torch.from_numpy(L)
tensor_images = torch.stack([img for img in ocludedImages])
dataset = torch.utils.data.TensorDataset(tensor_images,L)
dataloader = torch.utils.data.DataLoader(dataset,batch_size=5,shuffle=False, num_workers=8)
heatmap=np.empty(0)
model.eval()
for data in dataloader:
images, labels = data
if use_gpu:
images, labels = (images.cuda()), (labels.cuda(async=True))
outputs = model(Variable(images))
m = nn.Softmax()
outputs=m(outputs)
if use_gpu:
outs=outputs.cpu()
heatmap = np.concatenate((heatmap,outs[0:outs.size()[0],groundTruth].data.numpy()))
return heatmap.reshape((output_height, output_width))
def conv_bn(in_planes, out_planes, kernel_size, stride=1, padding=0, bias=False):
"convolution with batchnorm, relu"
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride,
padding=padding, bias=False),
nn.BatchNorm2d(out_planes, eps=1e-3),
nn.ReLU()
)
def _make_layer(self, block, planes, blocks, stride=1,
batch_norm=True):
downsample = None
if self.shortcut == 'C' or \
self.shortcut == 'B' and \
(stride != 1 or self.inplanes != planes * block.expansion):
downsample = [nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=not batch_norm)]
if batch_norm:
downsample.append(nn.BatchNorm2d(planes * block.expansion))
downsample = nn.Sequential(*downsample)
else:
downsample = PlainDownSample(
self.inplanes, planes * block.expansion, stride)
layers = []
layers.append(block(self.inplanes, planes,
stride, downsample, batch_norm))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, batch_norm=batch_norm))
return nn.Sequential(*layers)
def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32):
""" Constructor
Args:
in_channels: input channel dimensionality
out_channels: output channel dimensionality
stride: conv stride. Replaces pooling layer.
cardinality: num of convolution groups.
"""
super(DResNeXtBottleneck, self).__init__()
D = out_channels // 2
self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
self.conv_conv = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
self.shortcut = nn.Sequential()
if in_channels != out_channels:
self.shortcut.add_module('shortcut_conv',
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0,
bias=False))
def def_netF():
vgg19 = M.vgg19()
vgg19.load_state_dict(torch.load('vgg19.pth'))
vgg19.classifier = nn.Sequential(
*list(vgg19.classifier.children())[:2]
)
for param in vgg19.parameters():
param.requires_grad = False
return vgg19
def __init__(self, mode, anchors=9, classes=80, depth=4,
base_activation=F.relu,
output_activation=F.sigmoid):
super(SubNet, self).__init__()
self.anchors = anchors
self.classes = classes
self.depth = depth
self.base_activation = base_activation
self.output_activation = output_activation
self.subnet_base = nn.ModuleList([conv3x3(256, 256, padding=1)
for _ in range(depth)])
if mode == 'boxes':
self.subnet_output = conv3x3(256, 4 * self.anchors, padding=1)
elif mode == 'classes':
# add an extra dim for confidence
self.subnet_output = conv3x3(256, (1 + self.classes) * self.anchors, padding=1)
self._output_layer_init(self.subnet_output.bias.data)
def __init__(self):
super(GlobalFeatNet, self).__init__()
self.conv1 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(512)
self.conv2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(512)
self.conv3 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1)
self.bn3 = nn.BatchNorm2d(512)
self.conv4 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2d(512)
self.fc1 = nn.Linear(25088, 1024)
self.bn5 = nn.BatchNorm1d(1024)
self.fc2 = nn.Linear(1024, 512)
self.bn6 = nn.BatchNorm1d(512)
self.fc3 = nn.Linear(512, 256)
self.bn7 = nn.BatchNorm1d(256)
def __init__(
self,
):
super(Discriminator, self).__init__()
self.conv1 = nn.Conv2d(3, 64, 4, 2, 1, bias=False)
self.relu1 = nn.LeakyReLU(0.2, inplace=True)
self.conv2 = nn.Conv2d(64, 64 * 2, 4, 2, 1, bias=False)
self.bn2 = nn.BatchNorm2d(64 * 2)
self.relu2 = nn.LeakyReLU(0.2, inplace=True)
self.conv3 = nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False)
self.bn3 = nn.BatchNorm2d(64 * 4)
self.relu3 = nn.LeakyReLU(0.2, inplace=True)
self.conv4 = nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False)
self.bn4 = nn.BatchNorm2d(64 * 8)
self.relu4 = nn.LeakyReLU(0.2, inplace=True)
self.conv5 = nn.Conv2d(64 * 8, 1, 4, 1, 0, bias=False)
def __init__(self, input_nc, output_nc, num_downs, ngf=64,
norm_layer=nn.BatchNorm2d, use_dropout=False, gpu_ids=[]):
super(UnetGenerator, self).__init__()
self.gpu_ids = gpu_ids
# currently support only input_nc == output_nc
assert(input_nc == output_nc)
# construct unet structure
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, innermost=True)
for i in range(num_downs - 5):
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(output_nc, ngf, unet_block, outermost=True, norm_layer=norm_layer)
self.model = unet_block
def test_parameters(self):
def num_params(module):
return len(list(module.parameters()))
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
l1=l,
l2=l
)
self.param = Parameter(torch.Tensor(3, 5))
l = nn.Linear(10, 20)
n = Net()
s = nn.Sequential(n, n, n, n)
self.assertEqual(num_params(l), 2)
self.assertEqual(num_params(n), 3)
self.assertEqual(num_params(s), 3)
def test_parallel_apply(self):
l1 = nn.Linear(10, 5).float().cuda(0)
l2 = nn.Linear(10, 5).float().cuda(1)
i1 = Variable(torch.randn(2, 10).float().cuda(0))
i2 = Variable(torch.randn(2, 10).float().cuda(1))
expected1 = l1(i1).data
expected2 = l2(i2).data
inputs = (i1, i2)
modules = (l1, l2)
expected_outputs = (expected1, expected2)
outputs = dp.parallel_apply(modules, inputs)
for out, expected in zip(outputs, expected_outputs):
self.assertEqual(out.data, expected)
inputs = (i1, Variable(i2.data.new()))
expected_outputs = (expected1, expected2.new())
def test_MaxUnpool2d_output_size(self):
m = nn.MaxPool2d(3, stride=2, return_indices=True)
mu = nn.MaxUnpool2d(3, stride=2)
big_t = torch.rand(1, 1, 6, 6)
big_t[0][0][4][4] = 100
output_big, indices_big = m(Variable(big_t))
self.assertRaises(RuntimeError, lambda: mu(output_big, indices_big))
small_t = torch.rand(1, 1, 5, 5)
for i in range(0, 4, 2):
for j in range(0, 4, 2):
small_t[:,:,i,j] = 100
output_small, indices_small = m(Variable(small_t))
for h in range(3, 10):
for w in range(3, 10):
if 4 <= h <= 6 and 4 <= w <= 6:
size = (h, w)
if h == 5:
size = torch.LongStorage(size)
elif h == 6:
size = torch.LongStorage((1, 1) + size)
mu(output_small, indices_small, output_size=size)
else:
self.assertRaises(ValueError, lambda:
mu(output_small, indices_small, (h, w)))
models.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self, config):
super(BiLSTM, self).__init__()
self.drop = nn.Dropout(config['dropout'])
self.encoder = nn.Embedding(config['ntoken'], config['ninp'])
self.bilstm = nn.LSTM(config['ninp'], config['nhid'], config['nlayers'], dropout=config['dropout'],
bidirectional=True)
self.nlayers = config['nlayers']
self.nhid = config['nhid']
self.pooling = config['pooling']
self.dictionary = config['dictionary']
# self.init_weights()
self.encoder.weight.data[self.dictionary.word2idx['<pad>']] = 0
if os.path.exists(config['word-vector']):
print('Loading word vectors from', config['word-vector'])
vectors = torch.load(config['word-vector'])
assert vectors[2] >= config['ninp']
vocab = vectors[0]
vectors = vectors[1]
loaded_cnt = 0
for word in self.dictionary.word2idx:
if word not in vocab:
continue
real_id = self.dictionary.word2idx[word]
loaded_id = vocab[word]
self.encoder.weight.data[real_id] = vectors[loaded_id][:config['ninp']]
loaded_cnt += 1
print('%d words from external word vectors loaded.' % loaded_cnt)
# note: init_range constraints the value of initial weights
models.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def __init__(self, config):
super(SelfAttentiveEncoder, self).__init__()
self.bilstm = BiLSTM(config)
self.drop = nn.Dropout(config['dropout'])
self.ws1 = nn.Linear(config['nhid'] * 2, config['attention-unit'], bias=False)
self.ws2 = nn.Linear(config['attention-unit'], config['attention-hops'], bias=False)
self.tanh = nn.Tanh()
self.softmax = nn.Softmax()
self.dictionary = config['dictionary']
# self.init_weights()
self.attention_hops = config['attention-hops']
models.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def __init__(self, config):
super(Classifier, self).__init__()
if config['pooling'] == 'mean' or config['pooling'] == 'max':
self.encoder = BiLSTM(config)
self.fc = nn.Linear(config['nhid'] * 2, config['nfc'])
elif config['pooling'] == 'all':
self.encoder = SelfAttentiveEncoder(config)
self.fc = nn.Linear(config['nhid'] * 2 * config['attention-hops'], config['nfc'])
else:
raise Exception('Error when initializing Classifier')
self.drop = nn.Dropout(config['dropout'])
self.tanh = nn.Tanh()
self.pred = nn.Linear(config['nfc'], config['class-number'])
self.dictionary = config['dictionary']
# self.init_weights()
def assureRatio(img):
"""Ensure imgH <= imgW."""
b, c, h, w = img.size()
if h > w:
main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None)
img = main(img)
return img
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False):
super(AttentionWordRNN, self).__init__()
self.batch_size = batch_size
self.num_tokens = num_tokens
self.embed_size = embed_size
self.word_gru_hidden = word_gru_hidden
self.bidirectional = bidirectional
self.use_lstm = use_lstm
self.lookup = nn.Embedding(num_tokens, embed_size)
if bidirectional == True:
if use_lstm:
print("inside using LSTM")
self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True)
else:
self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True)
self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden))
self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1))
self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1))
else:
if use_lstm:
self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False)
else:
self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False)
self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden))
self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1))
self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1))
self.softmax_word = nn.Softmax()
self.weight_W_word.data.uniform_(-init_range, init_range)
self.weight_proj_word.data.uniform_(-init_range, init_range)
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False):
super(AttentionWordRNN, self).__init__()
self.batch_size = batch_size
self.num_tokens = num_tokens
self.embed_size = embed_size
self.word_gru_hidden = word_gru_hidden
self.bidirectional = bidirectional
self.use_lstm = use_lstm
self.lookup = nn.Embedding(num_tokens, embed_size)
if bidirectional == True:
if use_lstm:
print("inside using LSTM")
self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True)
else:
self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True)
self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden))
self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1))
self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1))
else:
if use_lstm:
self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False)
else:
self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False)
self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden))
self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1))
self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1))
self.softmax_word = nn.Softmax()
self.weight_W_word.data.uniform_(-init_range, init_range)
self.weight_proj_word.data.uniform_(-init_range, init_range)
def train_data(mini_batch, feature_batch, targets, word_attn_model, mix_softmax, optimizer, criterion, do_step=True, cuda=False, lstm=False):
state_word = word_attn_model.init_hidden()
optimizer.zero_grad()
#print("inside cuda", cuda)
if cuda:
if lstm:
state_word[0] = state_word[0].cuda()
state_word[1] = state_word[1].cuda()
else:
state_word = state_word.cuda()
mini_batch[0] = mini_batch[0].cuda()
mini_batch[1] = mini_batch[1].cuda()
feature_batch = feature_batch.cuda()
# word_optimizer.zero_grad()
# mix_optimizer.zero_grad()
# print mini_batch[0].unsqueeze(1).size()
# print mini_batch[1].unsqueeze(1).size()
s1, state_word, _ = word_attn_model(mini_batch[0].transpose(0,1), state_word)
s2, state_word, _ = word_attn_model(mini_batch[1].transpose(0,1), state_word)
s = torch.cat((s1, s2),0)
y_pred = mix_softmax(s, feature_batch)
# y_pred = mix_softmax(feature_batch)
if cuda:
y_pred = y_pred.cuda()
targets = targets.cuda()
# print y_pred.size(), targets.size(), "pred", y_pred, "targets", targets
loss = criterion(y_pred, targets)
loss.backward()
if do_step:
optimizer.step()
# word_optimizer.step()
# mix_optimizer.step()
grad_norm = torch.nn.utils.clip_grad_norm(optimizer._var_list, 1.0 * 1e20)
return loss.data[0], grad_norm