def __call__(self, trainer):
duration_epoch = time.time() - self.time_epoch
epoch = trainer.updater.epoch
loss = trainer.observation["main/loss"].data
logger.info("epoch: %s, duration: %ds, loss: %.6g.",
epoch, duration_epoch, loss)
# get rnn state
model = trainer.updater.get_optimizer("main").target
state = model.predictor.get_state()
# generate text
seed = generate_seed(self.text)
generate_text(model, seed)
# set rnn back to training state
model.predictor.set_state(state)
# reset time
self.time_epoch = time.time()
python类training()的实例源码
def main():
model = L.Classifier(CNN())
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
train, test = chainer.datasets.get_mnist(ndim=3)
train_iter = chainer.iterators.SerialIterator(train, batch_size=100)
test_iter = chainer.iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (5, 'epoch'), out='result')
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss',
'main/accuracy','validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())
trainer.run()
def gan_sampling(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128):
@chainer.training.make_extension()
def samples_generation(trainer):
if not os.path.exists(eval_folder):
os.makedirs(eval_folder)
z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
if gpu>=0:
z = cuda.to_gpu(z)
z = Variable(z, volatile=True)
imgs = gen(z, test=True)
save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
grid_w=rows, grid_h=cols)
return samples_generation
def gan_sampling_tags(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128, attr_len=38, threshold=0.25):
@chainer.training.make_extension()
def get_fake_tag():
prob2 = np.random.rand(attr_len)
tags = np.zeros((attr_len)).astype("f")
tags[:] = -1.0
tags[np.argmax(prob2[0:13])]=1.0
tags[27 + np.argmax(prob2[27:])] = 1.0
prob2[prob2<threshold] = -1.0
prob2[prob2>=threshold] = 1.0
for i in range(13, 27):
tags[i] = prob2[i]
return tags
def get_fake_tag_batch():
xp = gen.xp
batch = rows*cols
tags = xp.zeros((batch, attr_len)).astype("f")
for i in range(batch):
tags[i] = xp.asarray(get_fake_tag())
return tags
def samples_generation(trainer):
if not os.path.exists(eval_folder):
os.makedirs(eval_folder)
z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
if gpu>=0:
z = cuda.to_gpu(z)
tags =get_fake_tag_batch()
z = Variable(z, volatile=True)
tags = Variable(tags, volatile=True)
imgs = gen(F.concat([z,tags]), test=True)
save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
grid_w=rows, grid_h=cols)
return samples_generation
def ae_reconstruction(enc, dec, eval_folder, gpu, data_iter, batch_size=32, img_chan=3, img_size=64):
@chainer.training.make_extension()
def sample_reconstruction(trainer):
xp = enc.xp
batch = data_iter.next()
d_real = xp.zeros((batch_size, img_chan, img_size, img_size)).astype("f")
for i in range(batch_size):
d_real[i, :] = xp.asarray(batch[i])
x = Variable(d_real, volatile=True)
imgs = dec(enc(x, test=True), test=True)
save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".rec.jpg",
grid_w=batch_size//8, grid_h=8)
save_images_grid(d_real, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".real.jpg",
grid_w=batch_size//8, grid_h=8)
return sample_reconstruction
def main(gpu_id=-1, bs=32, epoch=20, out='./result', resume=''):
net = ShallowConv()
model = L.Classifier(net)
if gpu_id >= 0:
chainer.cuda.get_device_from_id(gpu_id)
model.to_gpu()
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
train, test = chainer.datasets.get_mnist(ndim=3)
train_iter = chainer.iterators.SerialIterator(train, bs)
test_iter = chainer.iterators.SerialIterator(
test, bs, repeat=False, shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
trainer.extend(extensions.ParameterStatistics(model.predictor))
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
trainer.extend(extensions.LogReport(log_name='parameter_statistics'))
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.extend(extensions.ProgressBar())
if resume:
chainer.serializers.load_npz(resume, trainer)
trainer.run()
not_layer_instance_norm_sample.py 文件源码
项目:instance_normalization_chainer
作者: crcrpar
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def main(gpu_id=-1, bs=32, epoch=20, out='./not_layer_result', resume=''):
net = ShallowConv()
model = L.Classifier(net)
if gpu_id >= 0:
chainer.cuda.get_device_from_id(gpu_id)
model.to_gpu()
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
train, test = chainer.datasets.get_mnist(ndim=3)
train_iter = chainer.iterators.SerialIterator(train, bs)
test_iter = chainer.iterators.SerialIterator(test, bs, repeat=False,
shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
trainer.extend(extensions.ParameterStatistics(model.predictor))
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.extend(extensions.ProgressBar())
if resume:
chainer.serializers.load_npz(resume, trainer)
trainer.run()
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
self.train = False
Param.load(self, defs_file)
self.extractor = FeatureExtractor(model_path)
else:
# training
self.train = True
p = Param(self)
p.dep_dim = dep_dim
p.word_dim = word_dim
p.afix_dim = afix_dim
p.hidden_dim = hidden_dim
p.elu_dim = elu_dim
p.nlayers = nlayers
p.n_words = len(read_model_defs(model_path + "/words.txt"))
p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
p.targets = read_model_defs(model_path + "/target.txt")
p.dump(defs_file)
self.in_dim = self.word_dim + 8 * self.afix_dim
self.dropout_ratio = dropout_ratio
super(LSTMParser, self).__init__(
emb_word=L.EmbedID(self.n_words, self.word_dim),
emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
lstm_f=L.NStepLSTM(nlayers, self.in_dim,
self.hidden_dim, self.dropout_ratio),
lstm_b=L.NStepLSTM(nlayers, self.in_dim,
self.hidden_dim, self.dropout_ratio),
linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim),
linear_cat2=L.Linear(self.elu_dim, len(self.targets)),
linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
biaffine=Biaffine(self.dep_dim)
)
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
hidden_dim=128, dep_dim=100, dropout_ratio=0.5):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
self.train = False
Param.load(self, defs_file)
self.extractor = FeatureExtractor(model_path, length=True)
else:
# training
self.train = True
p = Param(self)
p.dep_dim = dep_dim
p.word_dim = word_dim
p.afix_dim = afix_dim
p.hidden_dim = hidden_dim
p.nlayers = nlayers
p.n_words = len(read_model_defs(model_path + "/words.txt"))
p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
p.targets = read_model_defs(model_path + "/target.txt")
p.dump(defs_file)
self.in_dim = self.word_dim + 8 * self.afix_dim
self.dropout_ratio = dropout_ratio
super(FastBiaffineLSTMParser, self).__init__(
emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE),
emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
lstm_f=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
lstm_b=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
arc_dep=Linear(2 * self.hidden_dim, self.dep_dim),
arc_head=Linear(2 * self.hidden_dim, self.dep_dim),
rel_dep=Linear(2 * self.hidden_dim, self.dep_dim),
rel_head=Linear(2 * self.hidden_dim, self.dep_dim),
biaffine_arc=Biaffine(self.dep_dim),
biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2,
hidden_dim=128, relu_dim=64, dep_dim=100, dropout_ratio=0.5):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
# use as supertagger
self.train = False
Param.load(self, defs_file)
self.extractor = FeatureExtractor(model_path)
else:
# training
self.train = True
p = Param(self)
p.dep_dim = dep_dim
p.word_dim = word_dim
p.char_dim = char_dim
p.hidden_dim = hidden_dim
p.relu_dim = relu_dim
p.nlayers = nlayers
p.n_words = len(read_model_defs(model_path + "/words.txt"))
p.n_chars = len(read_model_defs(model_path + "/chars.txt"))
p.targets = read_model_defs(model_path + "/target.txt")
p.dump(defs_file)
self.in_dim = self.word_dim + self.char_dim
self.dropout_ratio = dropout_ratio
super(PeepHoleJaLSTMParser, self).__init__(
emb_word=L.EmbedID(self.n_words, self.word_dim),
emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE),
conv_char=L.Convolution2D(1, self.char_dim,
(3, 50), stride=1, pad=(1, 0)),
lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim),
lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim),
lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim),
lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim),
linear_cat1=L.Linear(2 * self.hidden_dim, self.relu_dim),
linear_cat2=L.Linear(self.relu_dim, len(self.targets)),
linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
biaffine=Biaffine(self.dep_dim)
)
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
self.train = False
Param.load(self, defs_file)
self.extractor = FeatureExtractor(model_path)
else:
# training
self.train = True
p = Param(self)
p.dep_dim = dep_dim
p.word_dim = word_dim
p.afix_dim = afix_dim
p.hidden_dim = hidden_dim
p.elu_dim = elu_dim
p.nlayers = nlayers
p.n_words = len(read_model_defs(model_path + "/words.txt"))
p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
p.targets = read_model_defs(model_path + "/target.txt")
p.dump(defs_file)
self.in_dim = self.word_dim + 8 * self.afix_dim
self.dropout_ratio = dropout_ratio
super(LSTMParser, self).__init__(
emb_word=L.EmbedID(self.n_words, self.word_dim),
emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
lstm_f=L.NStepLSTM(nlayers, self.in_dim,
self.hidden_dim, self.dropout_ratio),
lstm_b=L.NStepLSTM(nlayers, self.in_dim,
self.hidden_dim, self.dropout_ratio),
linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim),
linear_cat2=L.Linear(self.elu_dim, len(self.targets)),
linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
biaffine=Biaffine(self.dep_dim)
)
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
hidden_dim=128, dep_dim=100, dropout_ratio=0.5):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
self.train = False
Param.load(self, defs_file)
self.extractor = FeatureExtractor(model_path)
else:
# training
self.train = True
p = Param(self)
p.dep_dim = dep_dim
p.word_dim = word_dim
p.afix_dim = afix_dim
p.hidden_dim = hidden_dim
p.nlayers = nlayers
p.n_words = len(read_model_defs(model_path + "/words.txt"))
p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
p.targets = read_model_defs(model_path + "/target.txt")
p.dump(defs_file)
self.in_dim = self.word_dim + 8 * self.afix_dim
self.dropout_ratio = dropout_ratio
super(FastBiaffineLSTMParser, self).__init__(
emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE),
emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
biaffine_arc=Biaffine(self.dep_dim),
biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets))
)
def __init__(self, model_path, word_dim=None, caps_dim=None, suffix_dim=None):
self.model_path = model_path
if word_dim is None:
# use as supertagger
with open(os.path.join(model_path, "tagger_defs.txt")) as defs_file:
defs = json.load(defs_file)
self.word_dim = defs["word_dim"]
self.caps_dim = defs["caps_dim"]
self.suffix_dim = defs["suffix_dim"]
else:
# training
self.word_dim = word_dim
self.caps_dim = caps_dim
self.suffix_dim = suffix_dim
self.words = read_model_defs(os.path.join(model_path, "words.txt"))
self.suffixes = read_model_defs(os.path.join(model_path, "suffixes.txt"))
self.caps = read_model_defs(os.path.join(model_path, "caps.txt"))
self.targets = read_model_defs(os.path.join(model_path, "target.txt"))
# self.unk_word = self.words["*UNKNOWN*"]
self.unk_suffix = self.suffixes["UNK"]
in_dim = 7 * (self.word_dim + self.caps_dim + self.suffix_dim)
super(EmbeddingTagger, self).__init__(
emb_word=L.EmbedID(len(self.words), self.word_dim),
emb_caps=L.EmbedID(len(self.caps), self.caps_dim),
emb_suffix=L.EmbedID(len(self.suffixes), self.suffix_dim),
linear=L.Linear(in_dim, len(self.targets)),
)
def create_traindata(self, outdir):
trees = JaCCGReader(self.filepath).readall()
# first construct dictionaries only
for tree in trees:
self._traverse(tree)
# construct training samples with
# categories whose frequency >= freq_cut.
for tree in trees:
tokens = get_leaves(tree)
words = [token.word for token in tokens]
self.sents.append(" ".join(words))
cats = [token.cat.without_semantics for token in tokens]
samples = get_context_by_window(
words, CONTEXT, lpad=LPAD, rpad=RPAD)
assert len(samples) == len(cats)
for cat, sample in zip(cats, samples):
if self.cats[cat] >= self.cat_freq_cut:
self.samples[" ".join(sample)] = cat
self.cats = {k: v for (k, v) in self.cats.items() \
if v >= self.cat_freq_cut}
self.words = {k: v for (k, v) in self.words.items() \
if v >= self.word_freq_cut}
with open(outdir + "/unary_rules.txt", "w") as f:
self._write(self.unary_rules, f, comment_out_value=True)
with open(outdir + "/seen_rules.txt", "w") as f:
self._write(self.seen_rules, f, comment_out_value=True)
with open(outdir + "/target.txt", "w") as f:
self._write(self.cats, f, comment_out_value=False)
with open(outdir + "/words.txt", "w") as f:
self._write(self.words, f, comment_out_value=False)
with open(outdir + "/chars.txt", "w") as f:
self._write(self.chars, f, comment_out_value=False)
with open(outdir + "/traindata.json", "w") as f:
json.dump(self.samples, f)
with open(outdir + "/trainsents.txt", "w") as f:
for sent in self.sents:
f.write(sent.encode("utf-8") + "\n")
def __init__(self, model_path, word_dim=None, char_dim=None):
self.model_path = model_path
defs_file = model_path + "/tagger_defs.txt"
if word_dim is None:
# use as supertagger
with open(defs_file) as f:
defs = json.load(f)
self.word_dim = defs["word_dim"]
self.char_dim = defs["char_dim"]
else:
# training
self.word_dim = word_dim
self.char_dim = char_dim
with open(defs_file, "w") as f:
json.dump({"model": self.__class__.__name__,
"word_dim": self.word_dim,
"char_dim": self.char_dim}, f)
self.extractor = FeatureExtractor(model_path)
self.targets = read_model_defs(model_path + "/target.txt")
self.train = True
hidden_dim = 1000
in_dim = WINDOW_SIZE * (self.word_dim + self.char_dim)
super(JaCCGEmbeddingTagger, self).__init__(
emb_word=L.EmbedID(len(self.extractor.words), self.word_dim),
emb_char=L.EmbedID(len(self.extractor.chars),
self.char_dim, ignore_label=IGNORE),
linear1=L.Linear(in_dim, hidden_dim),
linear2=L.Linear(hidden_dim, len(self.targets)),
)
def __call__(self, trainer):
"""Decides whether the extension should be called on this iteration.
Args:
trainer (~chainer.training.Trainer): Trainer object that this
trigger is associated with. The ``observation`` of this trainer
is used to determine if the trigger should fire.
Returns:
bool: ``True`` if the corresponding extension should be invoked in
this iteration.
"""
observation = trainer.observation
summary = self._summary
key = self._key
if key in observation:
summary.add({key: observation[key]})
if not self._interval_trigger(trainer):
return False
stats = summary.compute_mean()
value = float(stats[key]) # copy to CPU
self._init_summary()
if self._best_value is None or self._compare(self._best_value, value):
self._best_value = value
return False
return True
def get_example(self, i):
# It reads the i-th image/label pair and return a preprocessed image.
# It applies following preprocesses:
# - Cropping (random or center rectangular)
# - Random flip
# - Scaling to [0, 1] value
crop_size = self.crop_size
image, label = self.base[i]
_, h, w = image.shape
if self.random:
# Randomly crop a region and flip the image
top = random.randint(0, h - crop_size - 1)
left = random.randint(0, w - crop_size - 1)
if random.randint(0, 1):
image = image[:, :, ::-1]
else:
# Crop the center
top = (h - crop_size) // 2
left = (w - crop_size) // 2
bottom = top + crop_size
right = left + crop_size
image = image[:, top:bottom, left:right]
image -= self.mean[:, top:bottom, left:right]
image *= (1.0 / 255.0) # Scale to [0, 1]
return image, label
# chainermn.create_multi_node_evaluator can be also used with user customized
# evaluator classes that inherit chainer.training.extensions.Evaluator.
def get_stats(self):
'''Get statistics of taking snapshots
After or during training, checkpointer holds statistics on
saving checkpoints such as average time, minimum and maximum
time. With this stats users may identify slow nodes or disk,
or know average time penalty of taking snapshot and optmize
interval to take snapshots.
'''
return self.stats.report()
def main():
unit = 1000
batchsize = 100
epoch = 20
model = L.Classifier(MLP(unit, 10))
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
train, test = chainer.datasets.get_mnist()
train_iter = chainer.iterators.SerialIterator(train, batchsize)
test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (epoch, 'epoch'), out='result')
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.snapshot(), trigger=(epoch, 'epoch'))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.extend(extensions.ProgressBar())
trainer.run()
def fit(model, train, valid, device=-1, batchsize=4096, n_epoch=500,
resume=None, alpha=1e-3):
if device >= 0:
chainer.cuda.get_device(device).use()
model.to_gpu(device)
optimizer = chainer.optimizers.Adam(alpha)
optimizer.setup(model)
# Setup iterators
train_iter = chainer.iterators.SerialIterator(train, batchsize)
valid_iter = chainer.iterators.SerialIterator(valid, batchsize,
repeat=False, shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer, device=device)
trainer = training.Trainer(updater, (n_epoch, 'epoch'),
out='out_' + str(device))
# Setup logging, printing & saving
keys = ['loss', 'rmse', 'bias', 'kld0', 'kld1']
keys += ['kldg', 'kldi', 'hypg', 'hypi']
keys += ['hypglv', 'hypilv']
reports = ['epoch']
reports += ['main/' + key for key in keys]
reports += ['validation/main/rmse']
trainer.extend(TestModeEvaluator(valid_iter, model, device=device))
trainer.extend(extensions.Evaluator(valid_iter, model, device=device))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
trainer.extend(extensions.PrintReport(reports))
trainer.extend(extensions.ProgressBar(update_interval=10))
# If previous model detected, resume
if resume:
print("Loading from {}".format(resume))
chainer.serializers.load_npz(resume, trainer)
# Run the model
trainer.run()
def pretrain_source_cnn(data, args, epochs=1000):
print(":: pretraining source encoder")
source_cnn = Loss(num_classes=10)
if args.device >= 0:
source_cnn.to_gpu()
optimizer = chainer.optimizers.Adam()
optimizer.setup(source_cnn)
train_iterator, test_iterator = data2iterator(data, args.batchsize, multiprocess=False)
# train_iterator = chainer.iterators.MultiprocessIterator(data, args.batchsize, n_processes=4)
updater = chainer.training.StandardUpdater(iterator=train_iterator, optimizer=optimizer, device=args.device)
trainer = chainer.training.Trainer(updater, (epochs, 'epoch') ,out=args.output)
# learning rate decay
# trainer.extend(extensions.ExponentialShift("alpha", rate=0.9, init=args.learning_rate, target=args.learning_rate*10E-5))
trainer.extend(extensions.Evaluator(test_iterator, source_cnn, device=args.device))
# trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch"))
trainer.extend(extensions.snapshot_object(optimizer.target, "source_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch"))
trainer.extend(extensions.ProgressBar(update_interval=10))
trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.run()
return source_cnn
def train_target_cnn(source, target, source_cnn, target_cnn, args, epochs=10000):
print(":: training encoder with target domain")
discriminator = Discriminator()
if args.device >= 0:
source_cnn.to_gpu()
target_cnn.to_gpu()
discriminator.to_gpu()
# target_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5)
target_optimizer = chainer.optimizers.RMSprop(lr=args.lr)
# target_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99)
target_optimizer.setup(target_cnn.encoder)
target_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
# discriminator_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5)
discriminator_optimizer = chainer.optimizers.RMSprop(lr=args.lr)
# discriminator_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99)
discriminator_optimizer.setup(discriminator)
discriminator_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
source_train_iterator, source_test_iterator = data2iterator(source, args.batchsize, multiprocess=False)
target_train_iterator, target_test_iterator = data2iterator(target, args.batchsize, multiprocess=False)
updater = ADDAUpdater(source_train_iterator, target_train_iterator, source_cnn, target_optimizer, discriminator_optimizer, args)
trainer = chainer.training.Trainer(updater, (epochs, 'epoch'), out=args.output)
trainer.extend(extensions.Evaluator(target_test_iterator, target_cnn, device=args.device))
# trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch"))
trainer.extend(extensions.snapshot_object(target_cnn, "target_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch"))
trainer.extend(extensions.ProgressBar(update_interval=10))
trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
trainer.extend(extensions.PrintReport(
["epoch", "loss/discrim", "loss/encoder",
"validation/main/loss", "validation/main/accuracy", "elapsed_time"]))
trainer.run()
def check_train(self, gpu):
outdir = tempfile.mkdtemp()
print("outdir: {}".format(outdir))
n_classes = 2
batch_size = 32
devices = {'main': gpu}
A = np.array([
[0, 1, 1, 0],
[1, 0, 0, 1],
[1, 0, 0, 0],
[0, 1, 0, 0],
]).astype(np.float32)
model = graph_cnn.GraphCNN(A, n_out=n_classes)
optimizer = optimizers.Adam(alpha=1e-4)
optimizer.setup(model)
train_dataset = EasyDataset(train=True, n_classes=n_classes)
train_iter = chainer.iterators.MultiprocessIterator(
train_dataset, batch_size)
updater = ParallelUpdater(train_iter, optimizer, devices=devices)
trainer = chainer.training.Trainer(updater, (10, 'epoch'), out=outdir)
trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
trainer.extend(extensions.PrintReport(
['epoch', 'iteration', 'main/loss', 'main/accuracy']))
trainer.extend(extensions.ProgressBar())
trainer.run()
02-train.py 文件源码
项目:Semantic-Segmentation-using-Adversarial-Networks
作者: oyam
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def parse_args(generators, discriminators, updaters):
parser = argparse.ArgumentParser(description='Semantic Segmentation using Adversarial Networks')
parser.add_argument('--generator', choices=generators.keys(), default='fcn32s',
help='Generator(segmentor) architecture')
parser.add_argument('--discriminator', choices=discriminators.keys(), default='largefov',
help='Discriminator architecture')
parser.add_argument('--updater', choices=updaters.keys(), default='gan',
help='Updater')
parser.add_argument('--initgen_path', default='pretrained_model/vgg16.npz',
help='Pretrained model of generator')
parser.add_argument('--initdis_path', default=None,
help='Pretrained model of discriminator')
parser.add_argument('--batchsize', '-b', type=int, default=1,
help='Number of images in each mini-batch')
parser.add_argument('--iteration', '-i', type=int, default=100000,
help='Number of sweeps over the dataset to train')
parser.add_argument('--gpu', '-g', type=int, default=-1,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--out', '-o', default='snapshot',
help='Directory to output the result')
parser.add_argument('--resume', '-r', default='',
help='Resume the training from snapshot')
parser.add_argument('--evaluate_interval', type=int, default=1000,
help='Interval of evaluation')
parser.add_argument('--snapshot_interval', type=int, default=10000,
help='Interval of snapshot')
parser.add_argument('--display_interval', type=int, default=10,
help='Interval of displaying log to console')
return parser.parse_args()
def fit(self, X, y=None, **kwargs):
"""If hyper parameters are set to None, then instance's variable is used,
this functionality is used Grid search with `set_params` method.
Also if instance's variable is not set, _default_hyperparam is used.
Usage: model.fit(train_dataset) or model.fit(X, y)
Args:
train: training dataset, assumes chainer's dataset class
test: test dataset for evaluation, assumes chainer's dataset class
batchsize: batchsize for both training and evaluation
iterator_class: iterator class used for this training,
currently assumes SerialIterator or MultiProcessIterator
optimizer: optimizer instance to update parameter
epoch: training epoch
out: directory path to save the result
snapshot_frequency (int): snapshot frequency in epoch.
Negative value indicates not to take snapshot.
dump_graph: Save computational graph info or not, default is False.
log_report: Enable LogReport or not
plot_report: Enable PlotReport or not
print_report: Enable PrintReport or not
progress_report: Enable ProgressReport or not
resume: specify trainer saved path to resume training.
"""
kwargs = self.filter_sk_params(self.fit_core, kwargs)
return self.fit_core(X, y, **kwargs)
def train_task(args, train_name, model, epoch_num,
train_dataset, test_dataset_dict, batch_size):
optimizer = optimizers.SGD()
optimizer.setup(model)
train_iter = iterators.SerialIterator(train_dataset, batch_size)
test_iter_dict = {name: iterators.SerialIterator(
test_dataset, batch_size, repeat=False, shuffle=False)
for name, test_dataset in test_dataset_dict.items()}
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (epoch_num, 'epoch'), out=args.out)
for name, test_iter in test_iter_dict.items():
trainer.extend(extensions.Evaluator(test_iter, model), name)
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss'] +
[test+'/main/loss' for test in test_dataset_dict.keys()] +
['main/accuracy'] +
[test+'/main/accuracy' for test in test_dataset_dict.keys()]))
trainer.extend(extensions.ProgressBar())
trainer.extend(extensions.PlotReport(
[test+"/main/accuracy" for test
in test_dataset_dict.keys()],
file_name=train_name+".png"))
trainer.run()
def main():
output_dim = 10
parser = argparse.ArgumentParser(description='EWC MNIST')
parser.add_argument('--batchsize', '-b', type=int, default=100,
help='Number of images in each mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=800,
help='Number of sweeps over the dataset to train')
"""
parser.add_argument('--gpu', '-g', type=int, default=-1,
help='GPU ID (negative value indicates CPU)')
"""
parser.add_argument('--out', '-o', default='result',
help='Directory to output the result')
parser.add_argument('--lam', '-l', type=float, default=15.,
help='lambda parameter for EWC loss')
parser.add_argument('--num_samples', '-n', type=int, default=200,
help='number of samples to compute fisher')
parser.add_argument('--hidden', '-hi', type=int, default=50,
help='number of hidden node')
parser.add_argument('--skip_taskA', '-s', type=bool, default=False,
help='whether skip training taskA or not')
args = parser.parse_args()
model = EWC(MLP(args.hidden, output_dim), args.lam, args.num_samples)
train, test = chainer.datasets.get_mnist()
train2, test2 = permutate_mnist([train, test])
print("Train without EWC")
train_tasks_continuosly(args, model, train, test, train2, test2,
enable_ewc=False)
print("Train with EWC")
train_tasks_continuosly(args, model, train, test, train2, test2,
enable_ewc=True)
def create_updater(train_iter, optimizer, devices):
if HAVE_NCCL and len(devices) > 1:
updater = training.updaters.MultiprocessParallelUpdater(
train_iter, optimizer, devices=devices)
elif len(devices) > 1:
optimizer.lr /= len(devices)
updater = training.ParallelUpdater(
train_iter, optimizer, devices=devices)
else:
updater = training.StandardUpdater(
train_iter, optimizer, device=devices['main'])
return updater
def create_updater(train_iter, optimizer, device):
updater = training.StandardUpdater(train_iter, optimizer, device=device)
return updater
def main(config_file):
with open(config_file) as fp:
conf = json.load(fp)
fe_conf = conf['feature_extractor']
cl_conf = conf['classifier']
fe_class = getattr(cnn_feature_extractors, fe_conf['model'])
feature_extractor = fe_class(n_classes=fe_conf['n_classes'], n_base_units=fe_conf['n_base_units'])
chainer.serializers.load_npz(fe_conf['out_file'], feature_extractor)
model = classifiers.MLPClassifier(cl_conf['n_classes'], feature_extractor)
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
device = cl_conf.get('device', -1)
train_dataset = feature_dataset(os.path.join(cl_conf['dataset_path'], 'train'), model)
train_iter = chainer.iterators.SerialIterator(train_dataset, conf.get('batch_size', 1))
updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device)
trainer = chainer.training.Trainer(updater, (cl_conf['epoch'], 'epoch'), out='out_re')
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.ProgressBar(update_interval=10))
test_dataset_path = os.path.join(cl_conf['dataset_path'], 'test')
if os.path.exists(test_dataset_path):
test_dataset = feature_dataset(test_dataset_path, model)
test_iter = chainer.iterators.SerialIterator(test_dataset, 10, repeat=False, shuffle=False)
trainer.extend(extensions.Evaluator(test_iter, model, device=device))
trainer.extend(extensions.PrintReport([
'epoch', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy'
]))
else:
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy']))
trainer.run()
chainer.serializers.save_npz(cl_conf['out_file'], model)