def read_and_decode(filename_queue):
"""read data from one file and decode to tensors."""
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'label': tf.FixedLenFeature(
[], tf.int64),
'target': tf.FixedLenFeature(
[], tf.string),
'un_len': tf.FixedLenFeature(
[], tf.int64),
'unlabeled': tf.VarLenFeature(tf.float32),
'la_len': tf.FixedLenFeature(
[], tf.int64),
'labeled': tf.VarLenFeature(tf.float32),
})
t_dense = features['target']
# decode it using the same numpy type in convert !!
t_decode = tf.decode_raw(t_dense, tf.float32)
# set_shape and reshape are both necessary ???
t_decode.set_shape([FLAGS.html_len * FLAGS.we_dim])
# t_cast = tf.cast(t_decode, tf.float32)
t_reshape = tf.reshape(t_decode, [FLAGS.html_len, FLAGS.we_dim])
un_len = tf.cast(features['un_len'], tf.int32)
un_rel = features['unlabeled']
# u_decode = tf.decode_raw(features['unlabeled'], tf.float32)
# un_rel = tf.sparse_tensor_to_dense(un_rel)
# # u_dense.set_shape(tf.pack([un_len, FLAGS.html_len, FLAGS.we_dim]))
# # u_reshape = tf.reshape(u_dense, [-1, FLAGS.html_len, FLAGS.we_dim])
# un_rel = tf.reshape(un_rel,
# tf.pack([un_len, FLAGS.html_len, FLAGS.we_dim]))
# un_rel = tf.pad(un_rel, [[0, FLAGS.max_relatives], [0, 0], [0, 0]])
# un_rel = tf.slice(un_rel, [0, 0, 0], [FLAGS.max_relatives, FLAGS.html_len,
# FLAGS.we_dim])
la_len = tf.cast(features['la_len'], tf.int32)
la_rel = features['labeled']
# la_rel = tf.sparse_tensor_to_dense(la_rel)
# la_rel = tf.reshape(la_rel, tf.pack([la_len, FLAGS.num_cats]))
# la_rel = tf.pad(la_rel, [[0, FLAGS.max_relatives], [0, 0]])
# la_rel = tf.slice(la_rel, [0, 0], [FLAGS.max_relatives, FLAGS.num_cats])
label = tf.cast(features['label'], tf.int32)
# u_reshape = tf.zeros([3, 4], tf.int32)
# l_reshape = tf.zeros([3, 4], tf.int32)
return t_reshape, un_rel, un_len, la_rel, la_len, label
评论列表
文章目录