speech_data.py 文件源码-python代码片段

def mfcc_batch_generator(batch_size=10, source=Source.DIGIT_WAVES, target=Target.digits):
    maybe_download(source, DATA_DIR)
    if target == Target.speaker: speakers = get_speakers()
    batch_features = []
    labels = []
    files = os.listdir(path)
    while True:
        print("loaded batch of %d files" % len(files))
        shuffle(files)
        for file in files:
            if not file.endswith(".wav"): continue
            wave, sr = librosa.load(path+file, mono=True)
            mfcc = librosa.feature.mfcc(wave, sr)
            if target==Target.speaker: label=one_hot_from_item(speaker(file), speakers)
            elif target==Target.digits:  label=dense_to_one_hot(int(file[0]),10)
            elif target==Target.first_letter:  label=dense_to_one_hot((ord(file[0]) - 48) % 32,32)
            elif target == Target.hotword: label = one_hot_word(file, pad_to=max_word_length)  #
            elif target == Target.word: label=string_to_int_word(file, pad_to=max_word_length)
                # label = file  # sparse_labels(file, pad_to=20)  # max_output_length
            else: raise Exception("todo : labels for Target!")
            labels.append(label)
            # print(np.array(mfcc).shape)
            mfcc=np.pad(mfcc,((0,0),(0,80-len(mfcc[0]))), mode='constant', constant_values=0)
            batch_features.append(np.array(mfcc))
            if len(batch_features) >= batch_size:
                # if target == Target.word:  labels = sparse_labels(labels)
                # labels=np.array(labels)
                # print(np.array(batch_features).shape)
                # yield np.array(batch_features), labels
                # print(np.array(labels).shape) # why (64,) instead of (64, 15, 32)? OK IFF dim_1==const (20)
                yield batch_features, labels  # basic_rnn_seq2seq inputs must be a sequence
                batch_features = []  # Reset for next batch
                labels = []


# If you set dynamic_pad=True when calling tf.train.batch the returned batch will be automatically padded with 0s. Handy! A lower-level option is to use tf.PaddingFIFOQueue.
# only apply to a subset of all images at one time