def processNMostCommon(N=3, wavdirpath=PATH_TRAIN_IN_16KWAVS, xmlpicklepath=PATH_TRAIN_OUT_XMLPICKLEFILE, todirrootpath=PATH_TRAIN_OUT_HDF5):
global spectrogramWindowLength
if not os.path.exists(todirrootpath):
os.makedirs(todirrootpath)
spectrogramHeight = 200
f = h5py.File(os.path.join(todirrootpath,"data_top{}_nozero.hdf5".format(N)), "w")
dsetX = f.create_dataset('X', (0,1,spectrogramHeight,spectrogramWindowLength), maxshape=(None, 1,spectrogramHeight,spectrogramWindowLength))
dsety = f.create_dataset('y', (0,N), maxshape=(None,N))
dsetMediaId = f.create_dataset('MediaId', (0,1), maxshape=(None,1))
dsetClassId = f.create_dataset('ClassId', (0,1), maxshape=(None,1), dtype=h5py.special_dtype(vlen=unicode))
import pickle
df = pd.read_pickle(xmlpicklepath) # read the metadata
# if we would like to keep recordings with a given quality than we can do it here by uncommenting the next line
#df = filterByQuality(df, 0, 3)
df["OFGS"] = df.apply(mergeOFGS, axis=1) # merge Order, Family, Genus, Species
df_mc = getMostCommon(df, N) # get N most common classes from the dataset
df = None # let GC free up some memory
print("Metadata loaded")
# Shuffle rows
df_mc = df_mc.iloc[np.random.permutation(len(df_mc))]
df_mc.reset_index(drop=True, inplace=True)
(lb,binaryLabels) = getOneHotClassId(df_mc) # generate one-hot labels
pickle.dump(lb, open(os.path.join(todirrootpath,"labelBinarizer_top{}.pickle".format(N)), 'wb'))
# process the selected files of top N classes and save the data into HDF5
fileRanges = np.hstack((np.arange(0, len(df_mc), 30), len(df_mc)))
for i in range(len(fileRanges)-1):
tempSG = wavsToSpectrogramByList(wavdirpath, df_mc.FileName[fileRanges[i]: fileRanges[i+1]], dontFilter=False)
X, y, fn, cIds = spectrogramListToT4(tempSG, \
binaryLabels[fileRanges[i]: fileRanges[i+1]], \
filenames = df_mc.MediaId[fileRanges[i]: fileRanges[i+1]].values, N=spectrogramWindowLength, \
classIds = df_mc.ClassId[fileRanges[i]: fileRanges[i+1]].values) #convert to t4
pre_len = dsetX.shape[0]
add_len = X.shape[0]
dsetX.resize(pre_len+add_len, axis=0)
dsety.resize(pre_len+add_len, axis=0)
dsetMediaId.resize(pre_len + add_len, axis=0)
dsetClassId.resize(pre_len + add_len, axis=0)
dsetX[pre_len:pre_len+add_len,:,:,:] = X
dsety[pre_len:pre_len+add_len,:] = y
dsetMediaId[pre_len:pre_len+add_len,:] = np.transpose([[int(i) for i in fn]])
dsetClassId[pre_len:pre_len+add_len,:] = np.transpose([[s.encode('utf8') for s in cIds]])
f.flush()
f.close
return (X,y,fn) # return last batch for debug purposes
评论列表
文章目录