def testVariablesPS(self):
deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)
with tf.device(deploy_config.variables_device()):
a = tf.Variable(0)
b = tf.Variable(0)
c = tf.no_op()
d = slim.variable('a', [],
caching_device=deploy_config.caching_device())
self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(a.device, a.value().device)
self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
self.assertDeviceEqual(b.device, b.value().device)
self.assertDeviceEqual(c.device, '')
self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(d.value().device, '')
python类device()的实例源码
def testCreateSingleclone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
clone = clones[0]
self.assertEqual(len(slim.get_variables()), 5)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, 'CPU:0')
self.assertDeviceEqual(v.value().device, 'CPU:0')
self.assertEqual(clone.outputs.op.name,
'BatchNormClassifier/fully_connected/Sigmoid')
self.assertEqual(clone.scope, '')
self.assertDeviceEqual(clone.device, '')
self.assertEqual(len(slim.losses.get_losses()), 1)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 2)
def testCreateOnecloneWithPS(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1,
num_ps_tasks=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(clones), 1)
clone = clones[0]
self.assertEqual(clone.outputs.op.name,
'BatchNormClassifier/fully_connected/Sigmoid')
self.assertDeviceEqual(clone.device, '/job:worker')
self.assertEqual(clone.scope, '')
self.assertEqual(len(slim.get_variables()), 5)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
self.assertDeviceEqual(v.device, v.value().device)
def testCreateLogisticClassifier(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = LogisticClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 2)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(update_ops, [])
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '')
self.assertDeviceEqual(v.device, 'CPU:0')
def testNoSummariesOnGPU(self):
with tf.Graph().as_default():
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
# clone function creates a fully_connected layer with a regularizer loss.
def ModelFn():
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
reg = tf.contrib.layers.l2_regularizer(0.001)
tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)
model = model_deploy.deploy(
deploy_config, ModelFn,
optimizer=tf.train.GradientDescentOptimizer(1.0))
# The model summary op should have a few summary inputs and all of them
# should be on the CPU.
self.assertTrue(model.summary_op.op.inputs)
for inp in model.summary_op.op.inputs:
self.assertEqual('/device:CPU:0', inp.device)
def testNoSummariesOnGPUForEvals(self):
with tf.Graph().as_default():
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
# clone function creates a fully_connected layer with a regularizer loss.
def ModelFn():
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
reg = tf.contrib.layers.l2_regularizer(0.001)
tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)
# No optimizer here, it's an eval.
model = model_deploy.deploy(deploy_config, ModelFn)
# The model summary op should have a few summary inputs and all of them
# should be on the CPU.
self.assertTrue(model.summary_op.op.inputs)
for inp in model.summary_op.op.inputs:
self.assertEqual('/device:CPU:0', inp.device)
def _optimize_clone(optimizer, clone, num_clones, regularization_losses,
**kwargs):
"""Compute losses and gradients for a single clone.
Args:
optimizer: A tf.Optimizer object.
clone: A Clone namedtuple.
num_clones: The number of clones being deployed.
regularization_losses: Possibly empty list of regularization_losses
to add to the clone losses.
**kwargs: Dict of kwarg to pass to compute_gradients().
Returns:
A tuple (clone_loss, clone_grads_and_vars).
- clone_loss: A tensor for the total loss for the clone. Can be None.
- clone_grads_and_vars: List of (gradient, variable) for the clone.
Can be empty.
"""
sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses)
clone_grad = None
if sum_loss is not None:
with tf.device(clone.device):
clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
return sum_loss, clone_grad
def clone_device(self, clone_index):
"""Device used to create the clone and all the ops inside the clone.
Args:
clone_index: Int, representing the clone_index.
Returns:
A value suitable for `tf.device()`.
Raises:
ValueError: if `clone_index` is greater or equal to the number of clones".
"""
if clone_index >= self._num_clones:
raise ValueError('clone_index must be less than num_clones')
device = ''
if self._num_ps_tasks > 0:
device += self._worker_device
if self._clone_on_cpu:
device += '/device:CPU:0'
else:
if self._num_clones > 1:
device += '/device:GPU:%d' % clone_index
return device
def extract_batch(dataset, config):
with tf.device("/cpu:0"):
bboxer = PriorBoxGrid(config)
data_provider = slim.dataset_data_provider.DatasetDataProvider(
dataset, num_readers=2,
common_queue_capacity=512, common_queue_min=32)
if args.segment:
im, bbox, gt, seg = data_provider.get(['image', 'object/bbox', 'object/label',
'image/segmentation'])
else:
im, bbox, gt = data_provider.get(['image', 'object/bbox', 'object/label'])
seg = tf.expand_dims(tf.zeros(tf.shape(im)[:2]), 2)
im = tf.to_float(im)/255
bbox = yxyx_to_xywh(tf.clip_by_value(bbox, 0.0, 1.0))
im, bbox, gt, seg = data_augmentation(im, bbox, gt, seg, config)
inds, cats, refine = bboxer.encode_gt_tf(bbox, gt)
return tf.train.shuffle_batch([im, inds, refine, cats, seg],
args.batch_size, 2048, 64, num_threads=4)
def getStatsEigen(self, stats=None):
if len(self.stats_eigen) == 0:
stats_eigen = {}
if stats is None:
stats = self.stats
tmpEigenCache = {}
with tf.device('/cpu:0'):
for var in stats:
for key in ['fprop_concat_stats', 'bprop_concat_stats']:
for stats_var in stats[var][key]:
if stats_var not in tmpEigenCache:
stats_dim = stats_var.get_shape()[1].value
e = tf.Variable(tf.ones(
[stats_dim]), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/e', trainable=False)
Q = tf.Variable(tf.diag(tf.ones(
[stats_dim])), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/Q', trainable=False)
stats_eigen[stats_var] = {'e': e, 'Q': Q}
tmpEigenCache[
stats_var] = stats_eigen[stats_var]
else:
stats_eigen[stats_var] = tmpEigenCache[
stats_var]
self.stats_eigen = stats_eigen
return self.stats_eigen
def __init__(self, hps, gpu_mode=True, reuse=False):
"""Initializer for the SketchRNN model.
Args:
hps: a HParams object containing model hyperparameters
gpu_mode: a boolean that when True, uses GPU mode.
reuse: a boolean that when true, attemps to reuse variables.
"""
self.hps = hps
with tf.variable_scope('vector_rnn', reuse=reuse):
if not gpu_mode:
with tf.device('/cpu:0'):
tf.logging.info('Model using cpu.')
self.build_model(hps)
else:
tf.logging.info('Model using gpu.')
self.build_model(hps)
def run():
if len(sys.argv) < 3:
print("** Usage: python3 " + sys.argv[0] + " <<Model Directory>> <<Test Set>>")
sys.exit(1)
np.random.seed(42)
model_dir = sys.argv[1]
config = Config.load(['./default.conf', os.path.join(model_dir, 'model.conf')])
model = create_model(config)
test_data = load_data(sys.argv[2], config.dictionary, config.grammar, config.max_length)
print("unknown", unknown_tokens)
with tf.Graph().as_default():
tf.set_random_seed(1234)
with tf.device('/cpu:0'):
model.build()
test_eval = Seq2SeqEvaluator(model, config.grammar, test_data, 'test', config.reverse_dictionary, beam_size=config.beam_size, batch_size=config.batch_size)
loader = tf.train.Saver()
with tf.Session() as sess:
loader.restore(sess, os.path.join(model_dir, 'best'))
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
#sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
test_eval.eval(sess, save_to_file=True)
def collect_results(results_tuple, returns):
r'''
This routine will help collecting partial results for the WER reports.
The ``results_tuple`` is composed of an array of the original labels,
an array of the corresponding decodings, an array of the corrsponding
distances and an array of the corresponding losses. ``returns`` is built up
in a similar way, containing just the unprocessed results of one
``session.run`` call (effectively of one batch).
Labels and decodings are converted to text before splicing them into their
corresponding results_tuple lists. In the case of decodings,
for now we just pick the first available path.
'''
# Each of the arrays within results_tuple will get extended by a batch of each available device
for i in range(len(available_devices)):
# Collect the labels
results_tuple[0].extend(sparse_tensor_value_to_texts(returns[0][i]))
# Collect the decodings - at the moment we default to the first one
results_tuple[1].extend(sparse_tensor_value_to_texts(returns[1][i][0]))
# Collect the distances
results_tuple[2].extend(returns[2][i])
# Collect the losses
results_tuple[3].extend(returns[3][i])
# For reporting we also need a standard way to do time measurements.
def collect_results(results_tuple, returns):
r'''
This routine will help collecting partial results for the WER reports.
The ``results_tuple`` is composed of an array of the original labels,
an array of the corresponding decodings, an array of the corrsponding
distances and an array of the corresponding losses. ``returns`` is built up
in a similar way, containing just the unprocessed results of one
``session.run`` call (effectively of one batch).
Labels and decodings are converted to text before splicing them into their
corresponding results_tuple lists. In the case of decodings,
for now we just pick the first available path.
'''
# Each of the arrays within results_tuple will get extended by a batch of each available device
for i in range(len(available_devices)):
# Collect the labels
results_tuple[0].extend(sparse_tensor_value_to_texts(returns[0][i]))
# Collect the decodings - at the moment we default to the first one
results_tuple[1].extend(sparse_tensor_value_to_texts(returns[1][i][0]))
# Collect the distances
results_tuple[2].extend(returns[2][i])
# Collect the losses
results_tuple[3].extend(returns[3][i])
# For reporting we also need a standard way to do time measurements.
def collect_results(results_tuple, returns):
r'''
This routine will help collecting partial results for the WER reports.
The ``results_tuple`` is composed of an array of the original labels,
an array of the corresponding decodings, an array of the corrsponding
distances and an array of the corresponding losses. ``returns`` is built up
in a similar way, containing just the unprocessed results of one
``session.run`` call (effectively of one batch).
Labels and decodings are converted to text before splicing them into their
corresponding results_tuple lists. In the case of decodings,
for now we just pick the first available path.
'''
# Each of the arrays within results_tuple will get extended by a batch of each available device
for i in range(len(available_devices)):
# Collect the labels
results_tuple[0].extend(sparse_tensor_value_to_texts(returns[0][i]))
# Collect the decodings - at the moment we default to the first one
results_tuple[1].extend(sparse_tensor_value_to_texts(returns[1][i][0]))
# Collect the distances
results_tuple[2].extend(returns[2][i])
# Collect the losses
results_tuple[3].extend(returns[3][i])
# For reporting we also need a standard way to do time measurements.
def __init__(self, mc, gpu_id):
with tf.device('/gpu:{}'.format(gpu_id)):
ModelSkeleton.__init__(self, mc)
self._add_forward_graph()
self._add_interpretation_graph()
self._add_loss_graph()
self._add_train_graph()
self._add_viz_graph()
def __init__(self, mc, gpu_id):
with tf.device('/gpu:{}'.format(gpu_id)):
ModelSkeleton.__init__(self, mc)
self._add_forward_graph()
self._add_interpretation_graph()
self._add_loss_graph()
self._add_train_graph()
self._add_viz_graph()
def __init__(self, mc, gpu_id):
with tf.device('/gpu:{}'.format(gpu_id)):
ModelSkeleton.__init__(self, mc)
self._add_forward_graph()
self._add_interpretation_graph()
self._add_loss_graph()
self._add_train_graph()
self._add_viz_graph()
def __init__(self, mc, gpu_id):
with tf.device('/gpu:{}'.format(gpu_id)):
ModelSkeleton.__init__(self, mc)
self._add_forward_graph()
self._add_interpretation_graph()
self._add_loss_graph()
self._add_train_graph()
self._add_viz_graph()
def get_variable(name, shape, initializer=None, dtype=tf.float32, device=None):
"""
Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
dtype: data type, defaults to tf.float32
device: device to which the variable will be pinned
Returns:
Variable Tensor
"""
if device is None:
device = '/cpu:0'
if initializer is None:
with tf.device(device):
var = tf.get_variable(name, shape, dtype=dtype)
else:
with tf.device(device):
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var