def _process_video(filename, coder):
"""
Process a single video file using FFmpeg
Args
filename: path to the video file
coder: instance of ImageCoder to provide TensorFlow image coding utils.
Returns:
video_buffer: numpy array with the video frames
mask_buffer: activity mask of the video frames
frame_h: integer, video height in pixels.
frame_w: integer, width width in pixels.
seq_length: sequence length (non-zero frames)
"""
video, raw_h, raw_w, seq_length = coder.decode_video(filename)
video = video.astype(np.uint8)
assert len(video.shape) == 4
assert video.shape[3] == 3
frame_h, frame_w = video.shape[1], video.shape[2]
# generate mask from annotations
groups = filename.split('/')
annot_file_name = groups[-1].split('.')[0] + '.xgtf'
annot_file_path = os.path.join(FLAGS.annotation_directory, groups[-2], annot_file_name)
parsed_bbx = _parse_annotation_xml(annot_file_path)
if FLAGS.resize_h != -1:
parsed_bbx = _resize_bbx(parsed_bbx, raw_h, raw_w)
masks = _bbx_to_mask(parsed_bbx, seq_length, FLAGS.resize_h, FLAGS.resize_w)
encoded_frames_seq = []
encoded_masks_seq = []
for idx in range(seq_length):
encoded_frames_seq.append(coder.encode_frame(video[idx, :, :, :]))
encoded_masks_seq.append(coder.encode_mask(masks[idx, :, :, :]))
return encoded_frames_seq, encoded_masks_seq, frame_h, frame_w, np.asscalar(seq_length)
generate_tfrecord.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录