generate_tfrecord.py 文件源码-python代码片段

def _process_video(filename, coder):
    """
    Process a single video file using FFmpeg
    Args
        filename: path to the video file
        coder: instance of ImageCoder to provide TensorFlow image coding utils.
    Returns:
        video_buffer: numpy array with the video frames
        mask_buffer: activity mask of the video frames
        frame_h: integer, video height in pixels.
        frame_w: integer, width width in pixels.
        seq_length: sequence length (non-zero frames)
    """

    video, raw_h, raw_w, seq_length = coder.decode_video(filename)
    video = video.astype(np.uint8)
    assert len(video.shape) == 4
    assert video.shape[3] == 3
    frame_h, frame_w = video.shape[1], video.shape[2]

    # generate mask from annotations
    groups = filename.split('/')
    annot_file_name = groups[-1].split('.')[0] + '.xgtf'
    annot_file_path = os.path.join(FLAGS.annotation_directory, groups[-2], annot_file_name)
    parsed_bbx = _parse_annotation_xml(annot_file_path)
    if FLAGS.resize_h != -1:
        parsed_bbx = _resize_bbx(parsed_bbx, raw_h, raw_w)
    masks = _bbx_to_mask(parsed_bbx, seq_length, FLAGS.resize_h, FLAGS.resize_w)

    encoded_frames_seq = []
    encoded_masks_seq  = []
    for idx in range(seq_length):
        encoded_frames_seq.append(coder.encode_frame(video[idx, :, :, :]))
        encoded_masks_seq.append(coder.encode_mask(masks[idx, :, :, :]))

    return encoded_frames_seq, encoded_masks_seq, frame_h, frame_w, np.asscalar(seq_length)