def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
self.apply(weights_init)
self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
#self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
python类Adam()的实例源码
def __init__(self):
super(Discriminator, self).__init__()
self.conv0 = nn.Conv1d(nc, ndf, 4, 2, 1, bias=False)
self.conv1 = nn.Conv1d(ndf, ndf * 2, 4, 2, 1, bias=False)
self.conv2 = nn.Conv1d(ndf * 2, ndf * 4, 4, 2, 1, bias=False)
self.conv3 = nn.Conv1d(ndf * 4, ndf * 8, 4, 2, 1, bias=False)
self.fc0_size = 512 * 128
self.fc0 = nn.Linear(self.fc0_size, 100)
self.relu = nn.LeakyReLU(0.2, inplace=True)
self.bn1 = nn.BatchNorm1d(ndf * 2)
self.bn2 = nn.BatchNorm1d(ndf * 4)
self.bn3 = nn.BatchNorm1d(ndf * 8)
self.sigmoid = nn.Sigmoid()
self.apply(weights_init)
self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
#self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
def train(self, lr, iters, batch_size = 256):
optimizer = optim.Adam(self.parameters(), lr=lr)
t = trange(iters)
for i in t:
optimizer.zero_grad()
inds = torch.floor(torch.rand(batch_size) * self.M).long().cuda()
# bug: floor(rand()) sometimes gives 1
inds[inds >= self.M] = self.M - 1
inds = Variable(inds)
loss = self.forward(inds)
# print loss.data[0]
t.set_description( str(loss.data[0]) )
loss.backward()
optimizer.step()
return self.state_model, self.goal_model
def train(self, lr, iters):
optimizer = optim.Adam(self.parameters(), lr=lr)
t = trange(iters)
for i in t:
optimizer.zero_grad()
loss = self.forward( () )
# print loss.data[0]
t.set_description( '%.3f | %.3f | %.3f | %.3f' % (self.mse, self.divergence, self.world_mse, self.location_mse) )
loss.backward()
optimizer.step()
U, V = self.__lookup()
recon = torch.mm(U, V.t())
# print U, V, recon
U = U.data.cpu().numpy()
V = V.data.cpu().numpy()
recon = recon.data.cpu().numpy()
return U, V, recon
utils.py 文件源码
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch
作者: wmingwei
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def generative_fine_tune(dbn, lr = 1e-2, epoch = 100, batch_size = 50, input_data = None, CD_k = 1, optimization_method = "Adam", momentum = 0, weight_decay = 0, test_input = None):
if optimization_method == "RMSprop":
optimizer = optim.RMSprop(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
elif optimization_method == "SGD":
optimizer = optim.SGD(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
elif optimization_method == "Adam":
optimizer = optim.Adam(dbn.parameters(), lr = lr, weight_decay = weight_decay)
for i in dbn.parameters():
i.mean().backward()
train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)
for i in range(epoch):
for batch_idx, (data, target) in enumerate(train_loader):
sleep_wake(dbn = dbn, optimizer = optimizer, lr = lr, CD_k = CD_k, v = data, batch_size = batch_size)
if not (type(test_input) == type(None)):
print("fine tune", i, ais_dbn.logp_ais(self, test_input, step = 1000, M_Z = 20, M_IS = 100, parallel = True))
utils.py 文件源码
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch
作者: wmingwei
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def joint_train(dbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, k_positive=10, k_negative=10, alpha = [1e-1,1e-1,1]):
u1 = nn.Parameter(torch.zeros(1))
u2 = nn.Parameter(torch.zeros(1))
# optimizer = optim.Adam(dbm.parameters(), lr = lr, weight_decay = weight_decay)
optimizer = optim.SGD(dbm.parameters(), lr = lr, momentum = 0.5)
train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)
optimizer_u = optim.Adam([u1,u2], lr = lr/1000, weight_decay = weight_decay)
for _ in range(epoch):
print("training epoch %i with u1 = %.4f, u2 = %.4f"%(_, u1.data.numpy()[0], u2.data.numpy()[0]))
for batch_idx, (data, target) in enumerate(train_loader):
data = Variable(data)
positive_phase, negative_phase= dbm(v_input = data, k_positive = k_positive, k_negative=k_negative, greedy = False)
loss = energy(dbm = dbm, layer = positive_phase) - energy(dbm = dbm, layer = negative_phase)+alpha[0] * torch.norm(torch.norm(dbm.W[0],2,1)-u1.repeat(dbm.W[0].size()[0],1))**2 + alpha[1]*torch.norm(torch.norm(dbm.W[1],2,1)-u2.repeat(dbm.W[1].size()[0],1))**2 + alpha[2] * (u1 - u2)**2
loss.backward()
optimizer.step()
optimizer.zero_grad()
optimizer_u.step()
optimizer_u.zero_grad()
utils.py 文件源码
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch
作者: wmingwei
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def train(rbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, L1_penalty = 0, test_set = None, CD_k = 10):
train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)
optimizer = optim.Adam(rbm.parameters(), lr = lr, weight_decay = weight_decay)
for i in range(epoch):
for batch_idx, (data, target) in enumerate(train_loader):
input_data = Variable(data)
v, v_ = rbm(input_data, CD_k = CD_k)
loss = rbm.free_energy(v) - rbm.free_energy(v_.detach())
loss.backward()
optimizer.step()
optimizer.zero_grad()
if not type(test_set) == type(None):
print("epoch %i: "%i, reconstruct_error(rbm, Variable(test_set)))
def get_opt(name):
opts = {
'SGD': optim.SGD,
'Adam': optim.Adam,
'Adagrad': optim.Adagrad,
'RMSprop': optim.RMSprop,
}
return opts[name]
def train_epochs(model, loss_fn, init_lr, model_dir):
if os.path.exists(model_dir):
shutil.rmtree(model_dir)
os.makedirs(model_dir)
optimizer = optim.Adam(model.parameters(), lr = init_lr) # setup the optimizer
learning_rate = init_lr
max_iter = 5
start_halfing_iter = 2
halfing_factor = 0.1
count = 0
half_flag = False
while count < max_iter:
count += 1
if count >= start_halfing_iter:
half_flag = True
print ("Starting epoch", count)
if half_flag:
learning_rate *= halfing_factor
adjust_learning_rate(optimizer, halfing_factor) # decay learning rate
model_path = model_dir + '/epoch' + str(count) + '_lr' + str(learning_rate) + '.pkl'
train_one_epoch(model, loss_fn, optimizer) # train one epoch
torch.save(model.state_dict(), model_path)
print ("End training")
def __init__(self, train, valid, test, config):
# fix seed
self.seed = config['seed']
np.random.seed(self.seed)
torch.manual_seed(self.seed)
torch.cuda.manual_seed(self.seed)
self.train = train
self.valid = valid
self.test = test
self.imgdim = len(train['imgfeat'][0])
self.sentdim = len(train['sentfeat'][0])
self.projdim = config['projdim']
self.margin = config['margin']
self.batch_size = 128
self.ncontrast = 30
self.maxepoch = 20
self.early_stop = True
config_model = {'imgdim': self.imgdim,'sentdim': self.sentdim,
'projdim': self.projdim}
self.model = COCOProjNet(config_model).cuda()
self.loss_fn = PairwiseRankingLoss(margin=self.margin).cuda()
self.optimizer = optim.Adam(self.model.parameters())
def __init__(self, train, valid, test, devscores, config):
# fix seed
np.random.seed(config['seed'])
torch.manual_seed(config['seed'])
assert torch.cuda.is_available(), 'torch.cuda required for Relatedness'
torch.cuda.manual_seed(config['seed'])
self.train = train
self.valid = valid
self.test = test
self.devscores = devscores
self.inputdim = train['X'].shape[1]
self.nclasses = config['nclasses']
self.seed = config['seed']
self.l2reg = 0.
self.batch_size = 64
self.maxepoch = 1000
self.early_stop = True
self.model = nn.Sequential(
nn.Linear(self.inputdim, self.nclasses),
nn.Softmax(),
)
self.loss_fn = nn.MSELoss()
if torch.cuda.is_available():
self.model = self.model.cuda()
self.loss_fn = self.loss_fn.cuda()
self.loss_fn.size_average = False
self.optimizer = optim.Adam(self.model.parameters(),
weight_decay=self.l2reg)
def get_optimizer(encoder, decoder, step=None, state=None, lr=0.0001):
encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr)
if not state:
state = load_state(step)
if state:
encoder_optimizer.load_state_dict(state['encoder_optim'])
decoder_optimizer.load_state_dict(state['decoder_optim'])
return encoder_optimizer, decoder_optimizer
def __init__(self):
if use_cuda:
self.encoder = EncoderRNN().cuda()
self.decoder = DecoderRNN().cuda()
else:
self.encoder = EncoderRNN()
self.decoder = DecoderRNN()
self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr)
self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr)
self.eta_step = hp.eta_min
def train(self):
optimizer = O.Adam([p for p in self.model.parameters()
if p.requires_grad])
step = 0
t = tqdm.tqdm()
for epoch in range(self.n_epochs):
for data in self.data_generator:
step += 1
optimizer.zero_grad()
if step % self.val_period == 0:
loss_b, loss_s = self.step_val(step, data)
else:
loss_b, loss_s = self.step_train(step, data)
loss_b.backward()
clip_grad_norm(self.model.parameters(), 10)
optimizer.step()
loss_val = loss_s.data[0]
if step % self.save_period == 0:
filename = self.ckpt_format.format(
epoch="{:02d}".format(epoch),
step="{:07d}".format(step),
loss="{:.4f}".format(loss_val)
)
self.save(filename)
t.set_description("[{}|{}]: loss={:.4f}".format(
epoch, step, loss_val
))
t.update()
def build_model(self):
"""Build generator and discriminator."""
self.generator = Generator(z_dim=self.z_dim,
image_size=self.image_size,
conv_dim=self.g_conv_dim)
self.discriminator = Discriminator(image_size=self.image_size,
conv_dim=self.d_conv_dim)
self.g_optimizer = optim.Adam(self.generator.parameters(),
self.lr, [self.beta1, self.beta2])
self.d_optimizer = optim.Adam(self.discriminator.parameters(),
self.lr, [self.beta1, self.beta2])
if torch.cuda.is_available():
self.generator.cuda()
self.discriminator.cuda()
def _makeOptimizer(self):
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.lr)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.lr)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.lr)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.lr)
else:
raise RuntimeError("Invalid optim method: " + self.method)
def get_optimizer(net, name="Adam"):
"""Get optimizer by name."""
if name == "Adam":
return optim.Adam(net.parameters(),
lr=params.learning_rate,
betas=(params.beta1, params.beta2))
def choose_optimizer(args, model):
if args.optim =='adam':
return optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
elif args.optim=='adagrad':
# optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
return optim.Adagrad([
{'params': model.parameters(), 'lr': args.lr}
], lr=args.lr, weight_decay=args.wd)
def __init__(self, args):
super(RN, self).__init__(args, 'RN')
self.conv = ConvInputModel()
##(number of filters per object+coordinate of object)*2+question vector
self.g_fc1 = nn.Linear((24+2)*2+11, 256)
self.g_fc2 = nn.Linear(256, 256)
self.g_fc3 = nn.Linear(256, 256)
self.g_fc4 = nn.Linear(256, 256)
self.f_fc1 = nn.Linear(256, 256)
self.coord_oi = torch.FloatTensor(args.batch_size, 2)
self.coord_oj = torch.FloatTensor(args.batch_size, 2)
if args.cuda:
self.coord_oi = self.coord_oi.cuda()
self.coord_oj = self.coord_oj.cuda()
self.coord_oi = Variable(self.coord_oi)
self.coord_oj = Variable(self.coord_oj)
# prepare coord tensor
def cvt_coord(i):
return [(i/5-2)/2., (i%5-2)/2.]
self.coord_tensor = torch.FloatTensor(args.batch_size, 25, 2)
if args.cuda:
self.coord_tensor = self.coord_tensor.cuda()
self.coord_tensor = Variable(self.coord_tensor)
np_coord_tensor = np.zeros((args.batch_size, 25, 2))
for i in range(25):
np_coord_tensor[:,i,:] = np.array( cvt_coord(i) )
self.coord_tensor.data.copy_(torch.from_numpy(np_coord_tensor))
self.fcout = FCOutputModel()
self.optimizer = optim.Adam(self.parameters(), lr=args.lr)
def __init__(self, args):
super(CNN_MLP, self).__init__(args, 'CNNMLP')
self.conv = ConvInputModel()
self.fc1 = nn.Linear(5*5*24 + 11, 256) # question concatenated to all
self.fcout = FCOutputModel()
self.optimizer = optim.Adam(self.parameters(), lr=args.lr)
#print([ a for a in self.parameters() ] )