def __init__(self, sess, checkpoint_dir, log_dir, training_paths, testing_paths, roi, im_size, nclass,
batch_size=1, layers=3, features_root=32, conv_size=3, dropout=0.5, testing_gt_available=True,
loss_type='cross_entropy', class_weights=None):
self.sess = sess
self.checkpoint_dir = checkpoint_dir
self.log_dir = log_dir
self.training_paths = training_paths
self.testing_paths = testing_paths
self.testing_gt_available = testing_gt_available
self.nclass = nclass
self.im_size = im_size
self.roi = roi # (roi_order, roi_name)
self.batch_size = batch_size
self.layers = layers
self.features_root = features_root
self.conv_size = conv_size
self.dropout = dropout
self.loss_type = loss_type
self.class_weights = class_weights
self.build_model()
self.saver = tf.train.Saver(tf.trainable_variables() + tf.get_collection_ref('bn_collections'))
python类get_collection_ref()的实例源码
def fit(self, X_train, y_train, X_valid, y_valid, X_test, y_test, steps=400):
tf.global_variables_initializer().run()
redirect=FDRedirector(STDERR)
for i in range(steps):
redirect.start()
feed_dict = {self.labels:y_train}
for key, tensor in self.features.items():
feed_dict[tensor] = X_train[key]
predictions, loss = sess.run([self.prediction, self.train_op], feed_dict=feed_dict)
if i % 10 == 0:
print("step:{} loss:{:.3g} np.std(predictions):{:.3g}".format(i, loss, np.std(predictions)))
self.threshold = float(min(self.threshold_from_data(X_valid, y_valid), self.threshold_from_data(X_train, y_train)))
tf.get_collection_ref("threshold")[0] = self.threshold
self.print_metrics(X_train, y_train, "Training")
self.print_metrics(X_valid, y_valid, "Validation")
errors = redirect.stop()
if errors:
print(errors)
self.print_metrics(X_test, y_test, "Test")
def initialize_tbcnn_weights(clz):
clz.initialize_embedding_weights()
# Don't train We
tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES).remove(clz.get('We'))
clz.create_variable('Wcomb1', (hyper.word_dim, hyper.word_dim),
tf.constant_initializer(-.2, .2))
clz.create_variable('Wcomb2', (hyper.word_dim, hyper.word_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('Wconvt', (hyper.word_dim, hyper.conv_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('Wconvl', (hyper.word_dim, hyper.conv_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('Wconvr', (hyper.word_dim, hyper.conv_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('Bconv', (hyper.conv_dim,),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('FC1/weight', (hyper.conv_dim, hyper.fc_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('FC1/bias', (hyper.fc_dim,),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('FC2/weight', (hyper.fc_dim, hyper.output_dim),
tf.random_uniform_initializer(-.2, .2))
clz.create_variable('FC2/bias', (hyper.output_dim, ),
tf.random_uniform_initializer(-.2, .2))
def save_checkpoint(self, checkpoint_name):
tf.get_collection_ref("threshold")[:] = [float(self.threshold)]
tf.get_collection_ref("features")[:] = self.features.values()
tf.get_collection_ref("loss")[:] = [self.loss]
tf.get_collection_ref("prediction")[:] = [self.prediction]
os.makedirs(os.path.dirname(checkpoint_name), exist_ok=True)
saver = tf.train.Saver()
saver.save(tf.get_default_session(), checkpoint_name)
with open(os.path.join(os.path.dirname(checkpoint_name), "hparams.txt"), "w") as f:
f.write(repr(self.hparams.__dict__))
def save_checkpoint(self, checkpoint_name):
tf.get_collection_ref("threshold")[:] = [float(self.threshold)]
tf.get_collection_ref("features")[:] = self.features.values()
tf.get_collection_ref("loss")[:] = [self.loss]
tf.get_collection_ref("prediction")[:] = [self.prediction]
os.makedirs(os.path.dirname(checkpoint_name), exist_ok=True)
saver = tf.train.Saver()
saver.save(tf.get_default_session(), checkpoint_name)
with open(os.path.join(os.path.dirname(checkpoint_name), "hparams.txt"), "w") as f:
f.write(repr(self.hparams.__dict__))
def save_checkpoint(self, checkpoint_name):
tf.get_collection_ref("threshold")[:] = [float(self.threshold)]
tf.get_collection_ref("features")[:] = self.features.values()
tf.get_collection_ref("loss")[:] = [self.loss]
tf.get_collection_ref("prediction")[:] = [self.prediction]
os.makedirs(os.path.dirname(checkpoint_name), exist_ok=True)
saver = tf.train.Saver()
saver.save(tf.get_default_session(), checkpoint_name)
with open(os.path.join(os.path.dirname(checkpoint_name), "hparams.txt"), "w") as f:
f.write(repr(self.hparams.__dict__))
def save_checkpoint(self, checkpoint_name):
tf.get_collection_ref("threshold")[:] = [float(self.threshold)]
tf.get_collection_ref("features")[:] = self.features.values()
tf.get_collection_ref("loss")[:] = [self.loss]
tf.get_collection_ref("prediction")[:] = [self.prediction]
os.makedirs(os.path.dirname(checkpoint_name), exist_ok=True)
saver = tf.train.Saver()
saver.save(tf.get_default_session(), checkpoint_name)
with open(os.path.join(os.path.dirname(checkpoint_name), "hparams.txt"), "w") as f:
f.write(repr(self.hparams.__dict__))
def __init__(self, sess, checkpoint_dir, log_dir, training_paths, testing_paths,
batch_size=1, layers=3, features_root=32, conv_size=3, dropout=0.5,
loss_type='cross_entropy', class_weights=None):
self.sess = sess
self.checkpoint_dir = checkpoint_dir
self.log_dir = log_dir
self.training_paths = training_paths
self.testing_paths = testing_paths
image, _ = read_patch(os.path.join(self.training_paths[0], '0'))
self.nclass = 4
self.batch_size = batch_size
self.patch_size = image.shape[:-1]
self.patch_stride = 4 # Used in deploy
self.channel = image.shape[-1]
self.layers = layers
self.features_root = features_root
self.conv_size = conv_size
self.dropout = dropout
self.loss_type = loss_type
self.class_weights = class_weights
self.patches_per_image = len(os.listdir(self.training_paths[0]))
self.build_model()
self.saver = tf.train.Saver(tf.trainable_variables() + tf.get_collection_ref('bn_collections'))
def variables_to_save(add_list=None):
"""Returns a list of variables to save.
add_list variables are always added to the list
Args:
add_list: a list of variables
Returns:
list: list of tensors to save
"""
if add_list is None:
add_list = []
return tf.trainable_variables() + tf.get_collection_ref(
REQUIRED_NON_TRAINABLES) + add_list + training_process_variables()
def save(self, sess, path, var_list=None, global_step=None):
# var_list = None returns the list of all saveable variables
saver = tf.train.Saver(var_list)
# temporary code
#del tf.get_collection_ref('LAYER_NAME_UIDS')[0]
save_path = saver.save(sess, save_path=path, global_step=global_step)
print('model saved at %s' % save_path)
def store_to_attr(self, attr, graph_item):
""" Make a graph item (variable or operation) accessible as a model attribute """
with self.graph.as_default():
setattr(self, attr, graph_item)
self._attrs.append(attr)
tf.get_collection_ref('attrs').append(graph_item)
def restore_collection(backup):
for k, v in six.iteritems(backup):
del tf.get_collection_ref(k)[:]
tf.get_collection_ref(k).extend(v)
def clear_collection(keys):
for k in keys:
del tf.get_collection_ref(k)[:]
def variables_to_save(addlist):
"""Create a list of all trained variables and required variables of the model.
Appends to the list, the addlist passed as argument.
Args:
addlist: (list, of, variables, to, save)
Returns:
a a list of variables"""
return tf.trainable_variables() + tf.get_collection_ref(
REQUIRED_NON_TRAINABLES) + addlist
def inference(video_id_batch, prediction_batch, label_batch, saver, out_file_location):
global_step_val = -1
with tf.Session() as sess:
if FLAGS.model_checkpoint_path:
checkpoint = FLAGS.model_checkpoint_path
else:
checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
if checkpoint:
logging.info("Loading checkpoint for eval: " + checkpoint)
# Restores from checkpoint
saver.restore(sess, checkpoint)
# Assuming model_checkpoint_path looks something like:
# /my-favorite-path/yt8m_train/model.ckpt-0, extract global_step from it.
global_step_val = checkpoint.split("/")[-1].split("-")[-1]
else:
logging.info("No checkpoint file found.")
return global_step_val
sess.run([tf.local_variables_initializer()])
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
input_indices = np.eye(4716)
try:
print("start saving parameters")
predictions = sess.run(prediction_batch, feed_dict={label_batch: input_indices})
np.savetxt(out_file_location, predictions)
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session() as sess:
video_id_batch, video_batch, video_label_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
#out_file.write("VideoId,LabelConfidencePairs\n")
filenum = 0
video_id = []
try:
while not coord.should_stop():
video_id_batch_val = sess.run(video_id_batch)
video_id.extend(video_id_batch_val)
now = time.time()
num_examples_processed += len(video_id_batch_val)
if num_examples_processed>=FLAGS.file_size:
if num_examples_processed>FLAGS.file_size:
print("Wrong!", num_examples_processed)
else:
print(num_examples_processed)
#logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
"""
thefile = open('inference_test/video_id_test_'+str(filenum)+'.out', 'w')
for item in video_id:
item = ''.join(str(e) for e in item)
thefile.write("%s\n" % item)"""
filenum += 1
video_id = []
num_examples_processed = 0
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
if num_examples_processed<FLAGS.file_size:
print(num_examples_processed)
thefile = open('inference_test/video_id_test_'+str(filenum)+'.out', 'w')
for item in video_id:
item = ''.join(str(e) for e in item)
thefile.write("%s\n" % item)
coord.join(threads)
sess.close()
def inference(reader,train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session() as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
if FLAGS.model_checkpoint_path:
latest_checkpoint = FLAGS.model_checkpoint_path
else:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
parameters = get_forward_parameters(vocab_size=reader.num_classes)
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
print("start saving parameters")
params = sess.run(parameters)
print(params)
for i in range(len(params)):
np.savetxt(FLAGS.train_dir+'/autoencoder_layer%d.model' % i, params[i])
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session() as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
if FLAGS.model_checkpoint_path:
latest_checkpoint = FLAGS.model_checkpoint_path
else:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, checkpoint_file, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
if checkpoint_file:
if not gfile.Exists(checkpoint_file + ".meta"):
logging.fatal("Unable to find checkpoint file at provided location '%s'" % checkpoint_file)
latest_checkpoint = checkpoint_file
else:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
video_label_prediction.py 文件源码
项目:video_labelling_using_youtube8m
作者: LittleWat
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def print_predicted_label(feature, topn=10, latest_checkpoint='./yt8m_model/model.ckpt-2833',
id2label_csv='./label_names.csv'):
id2label_ser = pd.read_csv(id2label_csv, index_col=0)
id2label = id2label_ser.to_dict()['label_name']
meta_graph_location = latest_checkpoint + ".meta"
sess = tf.InteractiveSession()
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
padded_feature = np.zeros([300, 1024])
padded_feature[:feature.shape[0], :] = Dequantize(feature)
video_batch_val = padded_feature[np.newaxis, :, :].astype(np.float32)
num_frames_batch_val = np.array([feature.shape[0]], dtype=np.int32)
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val,
num_frames_tensor: num_frames_batch_val})
predictions_val = predictions_val.flatten()
top_idxes = np.argsort(predictions_val)[::-1][:topn]
pprint.pprint([(id2label[x], predictions_val[x]) for x in top_idxes])
def inference(reader, train_dir, data_pattern, out_file_location, batch_size):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
image_id_batch, image_batch = get_input_data_tensors(reader, data_pattern, batch_size)
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("Id,Category\n")
try:
while not coord.should_stop():
image_id_batch_val, image_batch_val = sess.run([image_id_batch, image_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: image_batch_val})
now = time.time()
num_examples_processed += len(image_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(image_id_batch_val, predictions_val):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, train_dir, data_pattern, out_file_location, batch_size):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
image_id_batch, image_batch = get_input_data_tensors(reader, data_pattern, batch_size)
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("Id,Category\n")
try:
while not coord.should_stop():
image_id_batch_val, image_batch_val = sess.run([image_id_batch, image_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: image_batch_val})
now = time.time()
num_examples_processed += len(image_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(image_id_batch_val, predictions_val):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, checkpoint_file, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
if checkpoint_file:
if not gfile.Exists(checkpoint_file + ".meta"):
logging.fatal("Unable to find checkpoint file at provided location '%s'" % checkpoint_file)
latest_checkpoint = checkpoint_file
else:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def fix_saver(collection_lists=None):
# Workaround to prevent serialization warning by removing objects
if collection_lists is None:
try:
# Try latest api
l = tf.get_collection_ref("summary_tags")
l4 = tf.get_collection_ref(tf.GraphKeys.GRAPH_CONFIG)
except Exception:
l = tf.get_collection("summary_tags")
l4 = tf.get_collection(tf.GraphKeys.GRAPH_CONFIG)
l_stags = list(l)
l4_stags = list(l4)
del l[:]
del l4[:]
try:
# Try latest api
l1 = tf.get_collection_ref(tf.GraphKeys.DATA_PREP)
l2 = tf.get_collection_ref(tf.GraphKeys.DATA_AUG)
except Exception:
l1 = tf.get_collection(tf.GraphKeys.DATA_PREP)
l2 = tf.get_collection(tf.GraphKeys.DATA_AUG)
l1_dtags = list(l1)
l2_dtags = list(l2)
del l1[:]
del l2[:]
try: # Do not save exclude variables
l3 = tf.get_collection_ref(tf.GraphKeys.EXCL_RESTORE_VARS)
except Exception:
l3 = tf.get_collection(tf.GraphKeys.EXCL_RESTORE_VARS)
l3_tags = list(l3)
del l3[:]
return [l_stags, l1_dtags, l2_dtags, l3_tags, l4_stags]
else:
# 0.7+ workaround, restore values
for t in collection_lists[0]:
tf.add_to_collection("summary_tags", t)
for t in collection_lists[4]:
tf.add_to_collection(tf.GraphKeys.GRAPH_CONFIG, t)
for t in collection_lists[1]:
tf.add_to_collection(tf.GraphKeys.DATA_PREP, t)
for t in collection_lists[2]:
tf.add_to_collection(tf.GraphKeys.DATA_AUG, t)
for t in collection_lists[3]:
tf.add_to_collection(tf.GraphKeys.EXCL_RESTORE_VARS, t)
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session() as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
if FLAGS.check_point < 0:
meta_graph_location = latest_checkpoint + ".meta"
else:
meta_graph_location = FLAGS.train_dir + "/model.ckpt-" + str(FLAGS.check_point) + ".meta"
latest_checkpoint = FLAGS.train_dir + "/model.ckpt-" + str(FLAGS.check_point)
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def __init__(self, fn, args):
def _decompose_tensors(tensor_list):
result = []
for tensor in tensor_list:
if isinstance(tensor, tf.SparseTensor):
result.append(tensor.indices)
result.append(tensor.values)
result.append(tensor.dense_shape)
else:
result.append(tensor)
return result
def _copy_tensor(tensor):
if isinstance(tensor, tf.SparseTensor):
return tf.SparseTensor(
tf.identity(tensor.indices),
tf.identity(tensor.values),
tf.identity(tensor.dense_shape))
else:
return tf.identity(tensor)
# Apply fn to its args, keeping track of any table initializers that are
# added while fn is running, and also checking that no analyzers are added
# while fn is running.
all_table_initializers = tf.get_collection_ref(
tf.GraphKeys.TABLE_INITIALIZERS)
all_analyzers = tf.get_collection_ref(analyzers.ANALYZER_COLLECTION)
original_num_table_initializers = len(all_table_initializers)
original_num_analyzers = len(all_analyzers)
output = fn(*args)
if len(all_analyzers) != original_num_analyzers:
raise ValueError(
'One or more `Analyzer`s were created while inside '
'FunctionApplication.__init__')
# Set inputs and outputs of this op, flattening inputs and outputs into a
# list of tensors, but storing outputs in the original format for the return
# value of `apply_function`.
self._table_initializers = all_table_initializers[
original_num_table_initializers:]
self._inputs = _decompose_tensors(args)
# When traversing the graph, there isn't a clean way to handle `Map`s whose
# inputs and outputs overlap. Therefore we apply tf.identity to all outputs
# to ensure the outputs and inputs don't overlap.
if isinstance(output, tuple):
self._user_output = [_copy_tensor(tensor) for tensor in output]
self._outputs = _decompose_tensors(self._user_output)
else:
self._user_output = _copy_tensor(output)
self._outputs = _decompose_tensors([self._user_output])
tf.add_to_collection(FUNCTION_APPLICATION_COLLECTION, self)
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
if latest_checkpoint is None:
raise Exception("unable to find a checkpoint at location: %s" % train_dir)
else:
meta_graph_location = latest_checkpoint + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + latest_checkpoint)
saver.restore(sess, latest_checkpoint)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
num_examples_processed = 0
start_time = time.time()
out_file.write("VideoId,LabelConfidencePairs\n")
try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()
except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def loadEmbedding(self, sess):
""" Initialize embeddings with pre-trained word2vec vectors
Will modify the embedding weights of the current loaded model
Uses the GoogleNews pre-trained values (path hardcoded)
"""
# Fetch embedding variables from model
with tf.variable_scope("embedding_rnn_seq2seq/rnn/embedding_wrapper", reuse=True):
em_in = tf.get_variable("embedding")
with tf.variable_scope("embedding_rnn_seq2seq/embedding_rnn_decoder", reuse=True):
em_out = tf.get_variable("embedding")
# Disable training for embeddings
variables = tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES)
variables.remove(em_in)
variables.remove(em_out)
# If restoring a model, we can leave here
if self.globStep != 0:
return
# New model, we load the pre-trained word2vec data and initialize embeddings
with open(os.path.join(self.args.rootDir, 'data/word2vec/GoogleNews-vectors-negative300.bin'), "rb", 0) as f:
header = f.readline()
vocab_size, vector_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * vector_size
initW = np.random.uniform(-0.25, 0.25, (len(self.textData.word2id), vector_size))
for line in tqdm(range(vocab_size)):
word = []
while True:
ch = f.read(1)
if ch == b' ':
word = b''.join(word).decode('utf-8')
break
if ch != b'\n':
word.append(ch)
if word in self.textData.word2id:
initW[self.textData.word2id[word]] = np.fromstring(f.read(binary_len), dtype='float32')
else:
f.read(binary_len)
# PCA Decomposition to reduce word2vec dimensionality
if self.args.embeddingSize < vector_size:
U, s, Vt = np.linalg.svd(initW, full_matrices=False)
S = np.zeros((vector_size, vector_size), dtype=complex)
S[:vector_size, :vector_size] = np.diag(s)
initW = np.dot(U[:, :self.args.embeddingSize], S[:self.args.embeddingSize, :self.args.embeddingSize])
# Initialize input and output embeddings
sess.run(em_in.assign(initW))
sess.run(em_out.assign(initW))
def add_role(variables, roles):
r"""Add a role to a given variable.
Parameters
----------
var : :class:`~tensor.TensorVariable`
The variable to assign the new role to.
roles : :subclass:`Role`
this roles will be concatenated with current roles scope.
Notes
-----
Some roles are subroles of others (e.g. :class:`Weight` is a subrole
of :class:`Parameter`). This function will not add a role if a more
specific role has already been added. If you need to replace a role
with a parent role (e.g. replace :class:`Weight` with
:class:`Parameter`) you must do so manually.
"""
if roles is None:
return variables
roles = tuple([name_to_roles(r) for r in as_tuple(roles)])
# create tag attribute for variable
for var in as_tuple(variables):
# append roles scope
var_roles = get_roles(var, return_string=False) + \
roles + \
get_current_role_scope()
# ====== handle string roles first ====== #
_ = []
for r in var_roles:
if isinstance(r, string_types):
_add_to_collection_no_duplication(r, var)
elif isinstance(r, type) and issubclass(r, Role):
_.append(r)
var_roles = _
# ====== shrink the roles so there is NO subrole ====== #
new_roles = []
for r in var_roles:
if any(r != r0 and issubclass(r0, r) for r0 in var_roles):
tf.get_collection_ref(r.__name__).remove(var)
else:
new_roles.append(r)
# ====== adding new role ====== #
for r in new_roles:
_add_to_collection_no_duplication(r.__name__, var)
return variables