def mfcc_batch_generator(batch_size=10, source=Source.DIGIT_WAVES, target=Target.digits):
maybe_download(source, DATA_DIR)
if target == Target.speaker: speakers = get_speakers()
batch_features = []
labels = []
files = os.listdir(path)
while True:
print("loaded batch of %d files" % len(files))
shuffle(files)
for file in files:
if not file.endswith(".wav"): continue
wave, sr = librosa.load(path+file, mono=True)
mfcc = librosa.feature.mfcc(wave, sr)
if target==Target.speaker: label=one_hot_from_item(speaker(file), speakers)
elif target==Target.digits: label=dense_to_one_hot(int(file[0]),10)
elif target==Target.first_letter: label=dense_to_one_hot((ord(file[0]) - 48) % 32,32)
elif target == Target.hotword: label = one_hot_word(file, pad_to=max_word_length) #
elif target == Target.word: label=string_to_int_word(file, pad_to=max_word_length)
# label = file # sparse_labels(file, pad_to=20) # max_output_length
else: raise Exception("todo : labels for Target!")
labels.append(label)
# print(np.array(mfcc).shape)
mfcc=np.pad(mfcc,((0,0),(0,80-len(mfcc[0]))), mode='constant', constant_values=0)
batch_features.append(np.array(mfcc))
if len(batch_features) >= batch_size:
# if target == Target.word: labels = sparse_labels(labels)
# labels=np.array(labels)
# print(np.array(batch_features).shape)
# yield np.array(batch_features), labels
# print(np.array(labels).shape) # why (64,) instead of (64, 15, 32)? OK IFF dim_1==const (20)
yield batch_features, labels # basic_rnn_seq2seq inputs must be a sequence
batch_features = [] # Reset for next batch
labels = []
# If you set dynamic_pad=True when calling tf.train.batch the returned batch will be automatically padded with 0s. Handy! A lower-level option is to use tf.PaddingFIFOQueue.
# only apply to a subset of all images at one time
评论列表
文章目录