librispeech.py 文件源码-python代码片段

def ingest_librispeech(input_directory, manifest_file=None, absolute_paths=True):
    """ Finds all .txt files and their indicated .flac files and writes them to an Aeon
    compatible manifest file.

    Arguments:
        input_directory (str): Path to librispeech directory
        manifest_file (str): Path to manifest file to output.
        absolute_paths (bool): Whether audio file paths should be absolute or
                               relative to input_directory.
    """

    if not os.path.isdir(input_directory):
        raise IOError("Data directory does not exist! {}".format(input_directory))

    if manifest_file is None:
        manifest_file = os.path.join(input_directory, manifest_file)

    transcript_files = get_files(input_directory, pattern="*.txt")
    if len(transcript_files) == 0:
        raise IOError("No .txt files were found in {}".format(input_directory))

    tqdm.write("Preparing manifest file...")
    with open(manifest_file, "w") as manifest:
        manifest.write("@FILE\tSTRING\n")
        for tfile in tqdm(transcript_files, unit=" Files", mininterval=.001):
            directory = os.path.dirname(tfile)
            if absolute_paths is False:
                directory = os.path.relpath(directory, input_directory)

            with open(tfile, "r") as fid:
                for line in fid.readlines():
                    id_, transcript = line.split(" ", 1)
                    afile = "{}.flac".format(os.path.join(directory, id_))
                    manifest.write("{}\t{}\n".format(afile, transcript))