build_mtt.py 文件源码-python代码片段

def _process_dataset(anno, sample_rate, n_samples, n_threads):
  """Processes, and saves MagnaTagATune dataset using multi-processes.

  Args:
    anno: Annotation DataFrame contains tags, mp3_path, split, and shard.
    sample_rate: Sampling rate of the audios. If the sampling rate is different 
      with an audio's original sampling rate, then it re-samples the audio.
    n_samples: Number of samples one segment contains.
    n_threads: Number of threads to process the dataset.
  """
  args_queue = Queue()
  split_and_shard_sets = pd.unique([tuple(x) for x in anno[['split', 'shard']].values])

  for split, shard in split_and_shard_sets:
    assigned_anno = anno[(anno['split'] == split) & (anno['shard'] == shard)]
    n_shards = anno[anno['split'] == split]['shard'].nunique()

    args = (assigned_anno, sample_rate, n_samples, split, shard, n_shards)
    args_queue.put(args)

  if FLAGS.n_threads > 1:
    threads = []
    for _ in range(FLAGS.n_threads):
      thread = Thread(target=_process_audio_files, args=[args_queue])
      thread.start()
      threads.append(thread)

    for thread in threads:
      thread.join()
  else:
    _process_audio_files(args_queue)