def feed(self, batch):
"""
feed one batch to placeholders by constructing the feed dict
:param batch: a Batch object
:return: feed dict of inputs
"""
input_feed = {}
input_feed[self.inputs_.name] = batch.inputs
input_feed[self.targets_.name] = batch.targets
input_feed[self.mask_.name] = batch.masks
input_feed[self.dests_label_.name] = batch.dests
input_feed[self.seq_len_.name] = batch.seq_lens
if self.logits_mask__ is not None:
values = np.ones(len(batch.adj_indices), np.float32)
shape = np.array([np.size(batch.inputs), self.config.state_size], dtype=np.int32)
input_feed[self.logits_mask__] = tf.SparseTensorValue(batch.adj_indices, values, shape)
input_feed[self.lr_] = self.config.lr
if self.sub_onehot_targets_ is not None:
input_feed[self.sub_onehot_targets_] = batch.sub_onehot_target
return input_feed
python类SparseTensorValue()的实例源码
def pull_batch(query_data, doc_data, batch_idx):
query_in = query_data[batch_idx * BS:(batch_idx + 1) * BS, :]
doc_in = doc_data[batch_idx * BS:(batch_idx + 1) * BS, :]
cols = np.unique(np.concatenate((query_in.tocoo().col.T, doc_in.tocoo().col.T), axis=0))
# print(query_in.shape)
# print(doc_in.shape)
query_in = query_in[:, cols].tocoo()
doc_in = doc_in[:, cols].tocoo()
query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
return query_in, doc_in, cols
def pull_batch(query_data, doc_data, batch_idx):
# start = time.time()
query_in = query_data[batch_idx * BS:(batch_idx + 1) * BS, :]
doc_in = doc_data[batch_idx * BS:(batch_idx + 1) * BS, :]
query_in = query_in.tocoo()
doc_in = doc_in.tocoo()
query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
# end = time.time()
# print("Pull_batch time: %f" % (end - start))
return query_in, doc_in
def pull_batch(query_data, doc_data, batch_idx):
query_in = query_data[batch_idx * BS:(batch_idx + 1) * BS, :]
doc_in = doc_data[batch_idx * BS:(batch_idx + 1) * BS, :]
query_in = query_in.tocoo()
doc_in = doc_in.tocoo()
print(query_in.data.shape)
print(doc_in.data.shape)
query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
return query_in, doc_in
def pull_batch(query_data, doc_data, batch_idx):
#start = time.time()
query_in = query_data[batch_idx * BS:(batch_idx + 1) * BS, :]
doc_in = doc_data[batch_idx * BS:(batch_idx + 1) * BS, :]
cols = np.unique(np.concatenate((query_in.tocoo().col.T, doc_in.tocoo().col.T), axis=0))
query_in = query_in[:, cols].tocoo()
doc_in = doc_in[:, cols].tocoo()
#print(1.0 * len(query_in.data) / query_in.shape[0] / query_in.shape[1])
#print(1.0 * len(doc_in.data) / doc_in.shape[0] / doc_in.shape[1])
query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
#end = time.time()
#print("Pull_batch time: %f" % (end - start))
return query_in, doc_in, cols
def pull_batch(query_data, doc_data, batch_idx):
# start = time.time()
query_in = query_data[batch_idx * BS:(batch_idx + 1) * BS, :]
doc_in = doc_data[batch_idx * BS:(batch_idx + 1) * BS, :]
if batch_idx == 0:
print(query_in.getrow(53))
query_in = query_in.tocoo()
doc_in = doc_in.tocoo()
query_in = tf.SparseTensorValue(
np.transpose([np.array(query_in.row, dtype=np.int64), np.array(query_in.col, dtype=np.int64)]),
np.array(query_in.data, dtype=np.float),
np.array(query_in.shape, dtype=np.int64))
doc_in = tf.SparseTensorValue(
np.transpose([np.array(doc_in.row, dtype=np.int64), np.array(doc_in.col, dtype=np.int64)]),
np.array(doc_in.data, dtype=np.float),
np.array(doc_in.shape, dtype=np.int64))
# end = time.time()
# print("Pull_batch time: %f" % (end - start))
return query_in, doc_in
def _get_labels_feed_item(label_list, max_time):
"""
Generate the tensor from 'label_list' to feed as labels into the network
Args:
label_list: a list of encoded labels (ints)
max_time: the maximum time length of `label_list`
Returns: the SparseTensorValue to feed into the network
"""
label_shape = np.array([len(label_list), max_time], dtype=np.int)
label_indices = []
label_values = []
for labelIdx, label in enumerate(label_list):
for idIdx, identifier in enumerate(label):
label_indices.append([labelIdx, idIdx])
label_values.append(identifier)
label_indices = np.array(label_indices, dtype=np.int)
label_values = np.array(label_values, dtype=np.int)
return tf.SparseTensorValue(label_indices, label_values, label_shape)
def testMakeOutputDictError(self):
schema = self.toSchema({'a': tf.VarLenFeature(tf.string)})
# SparseTensor that cannot be represented as VarLenFeature.
fetches = {
'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]),
values=np.array([10.0, 20.0, 30.0]),
dense_shape=(1, 20))
}
with self.assertRaisesRegexp(
ValueError, 'cannot be decoded by ListColumnRepresentation'):
_ = impl_helper.make_output_dict(schema, fetches)
# SparseTensor of invalid rank.
fetches = {
'a': tf.SparseTensorValue(
indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]),
values=np.array([10.0, 20.0, 30.0]),
dense_shape=(1, 10, 10))
}
with self.assertRaisesRegexp(
ValueError, 'cannot be decoded by ListColumnRepresentation'):
_ = impl_helper.make_output_dict(schema, fetches)
# SparseTensor with indices that are out of order.
fetches = {
'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]),
values=np.array([10.0, 20.0, 30.0]),
dense_shape=(3, 20))
}
with self.assertRaisesRegexp(
ValueError, 'Encountered out-of-order sparse index'):
_ = impl_helper.make_output_dict(schema, fetches)
address_matching.py 文件源码
项目:TensorFlow-Machine-Learning-Cookbook
作者: PacktPublishing
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def sparse_from_word_vec(word_vec):
num_words = len(word_vec)
indices = [[xi, 0, yi] for xi,x in enumerate(word_vec) for yi,y in enumerate(x)]
chars = list(''.join(word_vec))
return(tf.SparseTensorValue(indices, chars, [num_words,1,1]))
# Loop through test indices
text_distances.py 文件源码
项目:TensorFlow-Machine-Learning-Cookbook
作者: PacktPublishing
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def create_sparse_vec(word_list):
num_words = len(word_list)
indices = [[xi, 0, yi] for xi,x in enumerate(word_list) for yi,y in enumerate(x)]
chars = list(''.join(word_list))
return(tf.SparseTensorValue(indices, chars, [num_words,1,1]))
sparsetensor.py 文件源码
项目:tensorflow_end2end_speech_recognition
作者: hirofumi0810
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def sparsetensor2list(labels_st, batch_size):
"""Convert labels from sparse tensor to list.
Args:
labels_st: A SparseTensor of labels
batch_size (int): the size of mini-batch
Returns:
labels (list): list of np.ndarray, size of `[B]`. Each element is a
sequence of target labels of an input.
"""
if isinstance(labels_st, tf.SparseTensorValue):
# Output of TensorFlow
indices = labels_st.indices
values = labels_st.values
else:
# labels_st is expected to be a list [indices, values, shape]
indices = labels_st[0]
values = labels_st[1]
if batch_size == 1:
return values.reshape((1, -1))
labels = []
batch_boundary = np.where(indices[:, 1] == 0)[0]
# TODO: Some errors occurred when ctc models do not output any labels
# print(batch_boundary)
# if len(batch_boundary) != batch_size:
# batch_boundary = np.array(batch_boundary.tolist() + [max(batch_boundary) + 1])
# print(indices)
for i in range(batch_size - 1):
label_each_utt = values[batch_boundary[i]:batch_boundary[i + 1]]
labels.append(label_each_utt)
# Last label
labels.append(values[batch_boundary[-1]:])
return labels
def __init__(self, sess, n, filename, jump_prob=0.05, epsilon=1e-4, max_iteration=100, drop_tol=1e-8, verbose=False):
"""
Computes PPR using iterative method. `epsilon` denotes convergence threshold.
Args:
sess (Session): tensorflow session.
n (int): Number of nodes.
filename (str): A csv file denoting the graph.
jump_prob (float): Jumping probability of PPR.
epsilon (float): Convergence threshold (uses l2-norm of difference).
max_iteration (int): Maximum number of allowed iterations.
drop_tol (float): No effect.
verbose (bool): Prints step messages if True.
"""
self.alias = 'iter'
self.verbose = verbose
self.pp("initializing")
self.sess = sess
self.n = n
self.c = jump_prob
self.e = epsilon
self.max_iteration = max_iteration
d = 1 - self.c
self.pp("preprocessing")
self.node2index, A = read_matrix(filename, d=d)
self.pp("tf init")
with tf.variable_scope('ppr_iterative_tf'):
t_A = tf.SparseTensorValue(list(zip(A.row, A.col)), A.data, dense_shape=[n, n])
t_old_r = tf.Variable((np.ones(n) / n)[:, np.newaxis])
self.t_cq = tf.placeholder(tf.float64, shape=[n, 1])
self.t_new_r = tf.Variable((np.ones(n) / n)[:, np.newaxis])
self.t_new_r_assign = tf.assign(self.t_new_r, _sdmm(t_A, t_old_r) + self.t_cq)
self.t_old_r_assign = tf.assign(t_old_r, self.t_new_r)
self.t_loss = tf.norm(self.t_new_r - t_old_r)
del A
def _binary_2d_label_to_sparse_value(labels):
"""Convert dense 2D binary indicator tensor to sparse tensor.
Only 1 values in `labels` are included in result.
Args:
labels: Dense 2D binary indicator tensor.
Returns:
`SparseTensorValue` whose values are indices along the last dimension of
`labels`.
"""
indices = []
values = []
batch = 0
for row in labels:
label = 0
xi = 0
for x in row:
if x == 1:
indices.append([batch, xi])
values.append(label)
xi += 1
else:
assert x == 0
label += 1
batch += 1
shape = [len(labels), len(labels[0])]
return tf.SparseTensorValue(
np.array(indices, np.int64),
np.array(values, np.int64),
np.array(shape, np.int64))
def _binary_3d_label_to_sparse_value(labels):
"""Convert dense 3D binary indicator tensor to sparse tensor.
Only 1 values in `labels` are included in result.
Args:
labels: Dense 2D binary indicator tensor.
Returns:
`SparseTensorValue` whose values are indices along the last dimension of
`labels`.
"""
indices = []
values = []
for d0, labels_d0 in enumerate(labels):
for d1, labels_d1 in enumerate(labels_d0):
d2 = 0
for class_id, label in enumerate(labels_d1):
if label == 1:
values.append(class_id)
indices.append([d0, d1, d2])
d2 += 1
else:
assert label == 0
shape = [len(labels), len(labels[0]), len(labels[0][0])]
return tf.SparseTensorValue(
np.array(indices, np.int64),
np.array(values, np.int64),
np.array(shape, np.int64))
def testNumRelevantSparse(self):
with self.test_session():
labels = tf.SparseTensorValue(
indices=(
(0, 0, 0), (0, 0, 1),
(0, 1, 0), (0, 1, 1), (0, 1, 2),
# (0, 2) missing
(1, 0, 0), (1, 0, 1), (1, 0, 2),
(1, 1, 0),
(1, 2, 0),
# (2, 0) missing
(2, 1, 0), (2, 1, 1),
(2, 2, 0)),
values=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13),
shape=(3, 3, 3))
self.assertAllEqual(
((1, 1, 0), (1, 1, 1), (0, 1, 1)),
metric_ops.num_relevant(labels, k=1).eval())
self.assertAllEqual(
((2, 2, 0), (2, 1, 1), (0, 2, 1)),
metric_ops.num_relevant(labels, k=2).eval())
label_lengths = ((2, 3, 0), (3, 1, 1), (0, 2, 1))
self.assertAllEqual(
label_lengths, metric_ops.num_relevant(labels, k=3).eval())
self.assertAllEqual(
label_lengths, metric_ops.num_relevant(labels, k=999).eval())
def testSparseExpandAndTileInvalidArgs(self):
x = tf.SparseTensorValue(
indices=[
(i, j, k) for i in range(3) for j in range(3) for k in range(3)],
values=[1] * 27,
shape=[3, 3, 3])
with self.assertRaisesRegexp(ValueError, 'nvalid multiple'):
metric_ops.expand_and_tile(x, multiple=0)
with self.test_session():
with self.assertRaises(tf.OpError):
metric_ops.expand_and_tile(x, multiple=1, dim=-4).eval()
with self.assertRaises(ValueError):
metric_ops.expand_and_tile(x, multiple=1, dim=4).eval()
def testSparseExpandAndTile1x(self):
# Shape (3,3).
x = tf.SparseTensorValue(
indices=[
[0, 0], [0, 1],
[1, 0], [1, 1], [1, 2],
[2, 0]],
values=[
1, 2,
3, 4, 5,
6],
shape=[3, 3])
with self.test_session():
expected_result_dim0 = tf.SparseTensorValue(
indices=[[0, i[0], i[1]] for i in x.indices], values=x.values,
shape=[1, 3, 3])
self._assert_sparse_tensors_equal(
expected_result_dim0, metric_ops.expand_and_tile(x, multiple=1).eval())
for dim in (-2, 0):
self._assert_sparse_tensors_equal(
expected_result_dim0,
metric_ops.expand_and_tile(x, multiple=1, dim=dim).eval())
expected_result_dim1 = tf.SparseTensorValue(
indices=[[i[0], 0, i[1]] for i in x.indices], values=x.values,
shape=[3, 1, 3])
for dim in (-1, 1):
self._assert_sparse_tensors_equal(
expected_result_dim1,
metric_ops.expand_and_tile(x, multiple=1, dim=dim).eval())
expected_result_dim2 = tf.SparseTensorValue(
indices=[[i[0], i[1], 0] for i in x.indices], values=x.values,
shape=[3, 3, 1])
self._assert_sparse_tensors_equal(
expected_result_dim2,
metric_ops.expand_and_tile(x, multiple=1, dim=2).eval())
# TODO(ptucker): Use @parameterized when it's available in tf.
def _binary_2d_label_to_sparse_value(labels):
"""Convert dense 2D binary indicator tensor to sparse tensor.
Only 1 values in `labels` are included in result.
Args:
labels: Dense 2D binary indicator tensor.
Returns:
`SparseTensorValue` whose values are indices along the last dimension of
`labels`.
"""
indices = []
values = []
batch = 0
for row in labels:
label = 0
xi = 0
for x in row:
if x == 1:
indices.append([batch, xi])
values.append(label)
xi += 1
else:
assert x == 0
label += 1
batch += 1
shape = [len(labels), len(labels[0])]
return tf.SparseTensorValue(
np.array(indices, np.int64),
np.array(values, np.int64),
np.array(shape, np.int64))
def _binary_3d_label_to_sparse_value(labels):
"""Convert dense 3D binary indicator tensor to sparse tensor.
Only 1 values in `labels` are included in result.
Args:
labels: Dense 2D binary indicator tensor.
Returns:
`SparseTensorValue` whose values are indices along the last dimension of
`labels`.
"""
indices = []
values = []
for d0, labels_d0 in enumerate(labels):
for d1, labels_d1 in enumerate(labels_d0):
d2 = 0
for class_id, label in enumerate(labels_d1):
if label == 1:
values.append(class_id)
indices.append([d0, d1, d2])
d2 += 1
else:
assert label == 0
shape = [len(labels), len(labels[0]), len(labels[0][0])]
return tf.SparseTensorValue(
np.array(indices, np.int64),
np.array(values, np.int64),
np.array(shape, np.int64))
def test_top_k_rank_invalid(self):
with self.test_session():
# top_k_predictions has rank < 2.
top_k_predictions = [9, 4, 6, 2, 0]
sp_labels = tf.SparseTensorValue(
indices=np.array([[0,], [1,], [2,]], np.int64),
values=np.array([2, 7, 8], np.int64),
shape=np.array([10,], np.int64))
with self.assertRaises(ValueError):
precision, _ = metrics.streaming_sparse_precision_at_top_k(
top_k_predictions=tf.constant(top_k_predictions, tf.int64),
labels=sp_labels)
tf.initialize_variables(tf.local_variables()).run()
precision.eval()
def test_three_labels_at_k5_some_out_of_range(self):
"""Tests that labels outside the [0, n_classes) range are ignored."""
predictions = [
[0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
[0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
]
top_k_predictions = [
[9, 4, 6, 2, 0],
[5, 7, 2, 9, 6],
]
sp_labels = tf.SparseTensorValue(
indices=[[0, 0], [0, 1], [0, 2], [0, 3],
[1, 0], [1, 1], [1, 2], [1, 3]],
# values -1 and 10 are outside the [0, n_classes) range and are ignored.
values=np.array([2, 7, -1, 8,
1, 2, 5, 10], np.int64),
shape=[2, 4])
# Class 2: 2 labels, 2 correct predictions.
self._test_streaming_sparse_precision_at_k(
predictions, sp_labels, k=5, expected=2.0 / 2, class_id=2)
self._test_streaming_sparse_precision_at_top_k(
top_k_predictions, sp_labels, expected=2.0 / 2, class_id=2)
# Class 5: 1 label, 1 correct prediction.
self._test_streaming_sparse_precision_at_k(
predictions, sp_labels, k=5, expected=1.0 / 1, class_id=5)
self._test_streaming_sparse_precision_at_top_k(
top_k_predictions, sp_labels, expected=1.0 / 1, class_id=5)
# Class 7: 1 label, 1 incorrect prediction.
self._test_streaming_sparse_precision_at_k(
predictions, sp_labels, k=5, expected=0.0 / 1, class_id=7)
self._test_streaming_sparse_precision_at_top_k(
top_k_predictions, sp_labels, expected=0.0 / 1, class_id=7)
# All classes: 10 predictions, 3 correct.
self._test_streaming_sparse_precision_at_k(
predictions, sp_labels, k=5, expected=3.0 / 10)
self._test_streaming_sparse_precision_at_top_k(
top_k_predictions, sp_labels, expected=3.0 / 10)
def test_three_labels_at_k5_some_out_of_range(self):
"""Tests that labels outside the [0, n_classes) count in denominator."""
predictions = [
[0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
[0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
sp_labels = tf.SparseTensorValue(
indices=[[0, 0], [0, 1], [0, 2], [0, 3],
[1, 0], [1, 1], [1, 2], [1, 3]],
# values -1 and 10 are outside the [0, n_classes) range.
values=np.array([2, 7, -1, 8,
1, 2, 5, 10], np.int64),
shape=[2, 4])
# Class 2: 2 labels, both correct.
self._test_streaming_sparse_recall_at_k(
predictions=predictions, labels=sp_labels, k=5, expected=2.0 / 2,
class_id=2)
# Class 5: 1 label, incorrect.
self._test_streaming_sparse_recall_at_k(
predictions=predictions, labels=sp_labels, k=5, expected=1.0 / 1,
class_id=5)
# Class 7: 1 label, incorrect.
self._test_streaming_sparse_recall_at_k(
predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1,
class_id=7)
# All classes: 8 labels, 3 correct.
self._test_streaming_sparse_recall_at_k(
predictions=predictions, labels=sp_labels, k=5, expected=3.0 / 8)
def testSparseExpandAndTileInvalidArgs(self):
x = tf.SparseTensorValue(
indices=[
(i, j, k) for i in range(3) for j in range(3) for k in range(3)],
values=[1] * 27,
shape=[3, 3, 3])
with self.assertRaisesRegexp(ValueError, 'nvalid multiple'):
metric_ops.expand_and_tile(x, multiple=0)
with self.test_session():
with self.assertRaises(tf.OpError):
metric_ops.expand_and_tile(x, multiple=1, dim=-4).eval()
with self.assertRaises(ValueError):
metric_ops.expand_and_tile(x, multiple=1, dim=4).eval()
def testSparseExpandAndTile1x(self):
# Shape (3,3).
x = tf.SparseTensorValue(
indices=[
[0, 0], [0, 1],
[1, 0], [1, 1], [1, 2],
[2, 0]],
values=[
1, 2,
3, 4, 5,
6],
shape=[3, 3])
with self.test_session():
expected_result_dim0 = tf.SparseTensorValue(
indices=[[0, i[0], i[1]] for i in x.indices], values=x.values,
shape=[1, 3, 3])
self._assert_sparse_tensors_equal(
expected_result_dim0,
metric_ops.expand_and_tile(x, multiple=1).eval())
for dim in (-2, 0):
self._assert_sparse_tensors_equal(
expected_result_dim0,
metric_ops.expand_and_tile(x, multiple=1, dim=dim).eval())
expected_result_dim1 = tf.SparseTensorValue(
indices=[[i[0], 0, i[1]] for i in x.indices], values=x.values,
shape=[3, 1, 3])
for dim in (-1, 1):
self._assert_sparse_tensors_equal(
expected_result_dim1,
metric_ops.expand_and_tile(x, multiple=1, dim=dim).eval())
expected_result_dim2 = tf.SparseTensorValue(
indices=[[i[0], i[1], 0] for i in x.indices], values=x.values,
shape=[3, 3, 1])
self._assert_sparse_tensors_equal(
expected_result_dim2,
metric_ops.expand_and_tile(x, multiple=1, dim=2).eval())
# TODO(ptucker): Use @parameterized when it's available in tf.
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
fd = {} # type: FeedDict
sentences = cast(Iterable[List[str]],
dataset.get_series(self.data_id, allow_none=True))
fd[self.train_mode] = train
if sentences is not None:
vectors, paddings = self.vocabulary.sentences_to_tensor(
list(sentences), train_mode=train)
# sentences_to_tensor returns time-major tensors, targets need to
# be batch-major
vectors = vectors.T
paddings = paddings.T
# Need to convert the data to a sparse representation
bool_mask = (paddings > 0.5)
indices = np.stack(np.where(bool_mask), axis=1)
values = vectors[bool_mask]
fd[self.train_targets] = tf.SparseTensorValue(
indices=indices, values=values,
dense_shape=vectors.shape)
return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
fd = {} # type: FeedDict
sentences = cast(Iterable[List[str]],
dataset.get_series(self.data_id, allow_none=True))
fd[self.train_mode] = train
if sentences is not None:
vectors, paddings = self.vocabulary.sentences_to_tensor(
list(sentences), train_mode=train)
# sentences_to_tensor returns time-major tensors, targets need to
# be batch-major
vectors = vectors.T
paddings = paddings.T
# Need to convert the data to a sparse representation
bool_mask = (paddings > 0.5)
indices = np.stack(np.where(bool_mask), axis=1)
values = vectors[bool_mask]
fd[self.train_targets] = tf.SparseTensorValue(
indices=indices, values=values,
dense_shape=vectors.shape)
return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
fd = {} # type: FeedDict
sentences = cast(Iterable[List[str]],
dataset.get_series(self.data_id, allow_none=True))
fd[self.train_mode] = train
if sentences is not None:
vectors, paddings = self.vocabulary.sentences_to_tensor(
list(sentences), train_mode=train)
# sentences_to_tensor returns time-major tensors, targets need to
# be batch-major
vectors = vectors.T
paddings = paddings.T
# Need to convert the data to a sparse representation
bool_mask = (paddings > 0.5)
indices = np.stack(np.where(bool_mask), axis=1)
values = vectors[bool_mask]
fd[self.train_targets] = tf.SparseTensorValue(
indices=indices, values=values,
dense_shape=vectors.shape)
return fd
def testMakeOutputDict(self):
schema = self.toSchema({
'a': tf.FixedLenFeature(None, tf.int64),
'b': tf.FixedLenFeature([], tf.float32),
'c': tf.FixedLenFeature([1], tf.float32),
'd': tf.FixedLenFeature([2, 2], tf.float32),
'e': tf.VarLenFeature(tf.string),
'f': tf.SparseFeature('idx', 'val', tf.float32, 10)
})
fetches = {
'a': np.array([100, 200, 300]),
'b': np.array([10.0, 20.0, 30.0]),
'c': np.array([[40.0], [80.0], [120.0]]),
'd': np.array([[[1.0, 2.0], [3.0, 4.0]],
[[5.0, 6.0], [7.0, 8.0]],
[[9.0, 10.0], [11.0, 12.0]]]),
'e': tf.SparseTensorValue(
indices=np.array([(0, 0), (0, 1), (0, 2), (2, 0), (2, 1), (2, 2)]),
values=np.array(['doe', 'a', 'deer', 'a', 'female', 'deer']),
dense_shape=(3, 3)),
'f': tf.SparseTensorValue(
indices=np.array([(0, 2), (0, 4), (0, 8), (1, 8), (1, 4)]),
values=np.array([10.0, 20.0, 30.0, 40.0, 50.0]),
dense_shape=(3, 20))
}
output_dict = impl_helper.make_output_dict(schema, fetches)
self.assertSetEqual(set(six.iterkeys(output_dict)),
set(['a', 'b', 'c', 'd', 'e', 'f']))
self.assertAllEqual(output_dict['a'], [100, 200, 300])
self.assertAllEqual(output_dict['b'], [10.0, 20.0, 30.0])
self.assertAllEqual(output_dict['c'], [[40.0], [80.0], [120.0]])
self.assertAllEqual(output_dict['d'], [[[1.0, 2.0], [3.0, 4.0]],
[[5.0, 6.0], [7.0, 8.0]],
[[9.0, 10.0], [11.0, 12.0]]])
self.assertAllEqual(output_dict['e'][0], ['doe', 'a', 'deer'])
self.assertAllEqual(output_dict['e'][1], [])
self.assertAllEqual(output_dict['e'][2], ['a', 'female', 'deer'])
self.assertEqual(len(output_dict['f']), 2)
self.assertAllEqual(output_dict['f'][0][0], [2, 4, 8])
self.assertAllEqual(output_dict['f'][0][1], [8, 4])
self.assertAllEqual(output_dict['f'][0][2], [])
self.assertAllEqual(output_dict['f'][1][0], [10.0, 20.0, 30.0])
self.assertAllEqual(output_dict['f'][1][1], [40.0, 50.0])
self.assertAllEqual(output_dict['f'][1][2], [])
def __init__(self, sess, n, filename, jump_prob=0.05, drop_tol=1e-8, verbose=False):
"""
Computes PPR using LU decomposition.
Args:
sess (Session): tensorflow session.
n (int): Number of nodes.
filename (str): A csv file denoting the graph.
jump_prob (float): Jumping probability of PPR.
drop_tol (float): Drops entries with absolute value lower than this value when computing inverse of LU.
verbose (bool): Prints step messages if True.
"""
self.alias = 'ludc'
self.verbose = verbose
self.pp("initializing")
self.sess = sess
self.n = n
self.c = jump_prob
d = 1 - self.c
t = drop_tol
exact = False
if t is None:
t = np.power(n, -0.5)
elif t == 0:
exact = True
self.pp("reading")
self.node2index, H = read_matrix(filename, d=-d, add_identity=True)
self.pp("sorting H")
self.perm = degree_reverse_rank_perm(H)
H = reorder_matrix(H, self.perm).tocsc()
self.pp("computing LU decomposition")
if exact:
self.LU = splu(H)
else:
self.LU = spilu(H, drop_tol=t)
Linv = inv(self.LU.L).tocoo()
Uinv = inv(self.LU.U).tocoo()
self.pp("tf init")
with tf.variable_scope('ppr_lu_decomposition_tf'):
t_Linv = tf.SparseTensorValue(list(zip(Linv.row, Linv.col)), Linv.data, dense_shape=self.LU.L.shape)
t_Uinv = tf.SparseTensorValue(list(zip(Uinv.row, Uinv.col)), Uinv.data, dense_shape=self.LU.U.shape)
self.t_q = tf.placeholder(tf.float64, shape=[self.n, 1])
self.t_r = _sdmm(t_Uinv, _sdmm(t_Linv, self.c * self.t_q))
def testSparseExpandAndTile5x(self):
# Shape (3,3).
x = tf.SparseTensorValue(
indices=(
(0, 0), (0, 1),
(1, 0), (1, 1), (1, 2),
(2, 0)),
values=(
1, 2,
3, 4, 5,
6),
shape=(3, 3))
with self.test_session():
expected_result_dim0 = tf.SparseTensorValue(
indices=[(d0, i[0], i[1]) for d0 in range(5) for i in x.indices],
values=[v for _ in range(5) for v in x.values],
shape=(5, 3, 3))
self._assert_sparse_tensors_equal(
expected_result_dim0,
metric_ops.expand_and_tile(x, multiple=5).eval())
for dim in (-2, 0):
self._assert_sparse_tensors_equal(
expected_result_dim0,
metric_ops.expand_and_tile(x, multiple=5, dim=dim).eval())
expected_result_dim1 = tf.SparseTensorValue(
indices=[
(d0, d1, i[1])
for d0 in range(3)
for d1 in range(5)
for i in x.indices if i[0] == d0],
values=x.values[0:2] * 5 + x.values[2:5] * 5 + x.values[5:] * 5,
shape=(3, 5, 3))
for dim in (-1, 1):
self._assert_sparse_tensors_equal(
expected_result_dim1,
metric_ops.expand_and_tile(x, multiple=5, dim=dim).eval())
expected_result_dim2 = tf.SparseTensorValue(
indices=[(i[0], i[1], d2) for i in x.indices for d2 in range(5)],
values=[v for v in x.values for _ in range(5)],
shape=(3, 3, 5))
self._assert_sparse_tensors_equal(
expected_result_dim2,
metric_ops.expand_and_tile(x, multiple=5, dim=2).eval())