loadData.py 文件源码-python代码片段

def processNMostCommon(N=3, wavdirpath=PATH_TRAIN_IN_16KWAVS, xmlpicklepath=PATH_TRAIN_OUT_XMLPICKLEFILE, todirrootpath=PATH_TRAIN_OUT_HDF5):
    global spectrogramWindowLength

    if not os.path.exists(todirrootpath):
        os.makedirs(todirrootpath)

    spectrogramHeight = 200

    f       = h5py.File(os.path.join(todirrootpath,"data_top{}_nozero.hdf5".format(N)), "w")
    dsetX   = f.create_dataset('X', (0,1,spectrogramHeight,spectrogramWindowLength), maxshape=(None, 1,spectrogramHeight,spectrogramWindowLength))
    dsety   = f.create_dataset('y', (0,N), maxshape=(None,N))
    dsetMediaId = f.create_dataset('MediaId', (0,1), maxshape=(None,1))
    dsetClassId = f.create_dataset('ClassId', (0,1), maxshape=(None,1), dtype=h5py.special_dtype(vlen=unicode))

    import pickle    
    df      = pd.read_pickle(xmlpicklepath) # read the metadata

    # if we would like to keep recordings with a given quality than we can do it here by uncommenting the next line
    #df = filterByQuality(df, 0, 3)

    df["OFGS"]  = df.apply(mergeOFGS, axis=1) # merge Order, Family, Genus, Species
    df_mc   = getMostCommon(df, N) # get N most common classes from the dataset
    df      = None # let GC free up some memory
    print("Metadata loaded")

    # Shuffle rows
    df_mc   = df_mc.iloc[np.random.permutation(len(df_mc))]
    df_mc.reset_index(drop=True, inplace=True)
    (lb,binaryLabels) = getOneHotClassId(df_mc) # generate one-hot labels
    pickle.dump(lb, open(os.path.join(todirrootpath,"labelBinarizer_top{}.pickle".format(N)), 'wb'))

    # process the selected files of top N classes and save the data into HDF5
    fileRanges = np.hstack((np.arange(0, len(df_mc), 30), len(df_mc)))
    for i in range(len(fileRanges)-1):
        tempSG      = wavsToSpectrogramByList(wavdirpath, df_mc.FileName[fileRanges[i]: fileRanges[i+1]], dontFilter=False)
        X, y, fn, cIds  = spectrogramListToT4(tempSG, \
                        binaryLabels[fileRanges[i]: fileRanges[i+1]], \
                        filenames = df_mc.MediaId[fileRanges[i]: fileRanges[i+1]].values, N=spectrogramWindowLength, \
                        classIds = df_mc.ClassId[fileRanges[i]: fileRanges[i+1]].values) #convert to t4
        pre_len     = dsetX.shape[0]
        add_len     = X.shape[0]
        dsetX.resize(pre_len+add_len, axis=0)
        dsety.resize(pre_len+add_len, axis=0)
        dsetMediaId.resize(pre_len + add_len, axis=0)
        dsetClassId.resize(pre_len + add_len, axis=0)
        dsetX[pre_len:pre_len+add_len,:,:,:] = X
        dsety[pre_len:pre_len+add_len,:] = y
        dsetMediaId[pre_len:pre_len+add_len,:] = np.transpose([[int(i) for i in fn]])
        dsetClassId[pre_len:pre_len+add_len,:] = np.transpose([[s.encode('utf8') for s in cIds]])
        f.flush()

    f.close
    return (X,y,fn) # return last batch for debug purposes