def singleFrame_classify_video(signal, net, transformer, with_smoothing, classNamesCNN):
batch_size = 1
input_images = []
input_im = caffe.io.load_image(signal.replace(".wav",".png"))
input_images.append(input_im)
os.remove(signal.replace(".wav",".png"))
#Initialize predictions matrix
output_predictions = np.zeros((len(input_images),2))
output_classes = []
#print [method for method in dir(net) if callable(getattr(net, method))]
for i in range(0,len(input_images)):
# print "Classifying Spectrogram: ",i+1
clip_input = input_images[i:min(i+batch_size, len(input_images))] #get every image -- batch_size==1
clip_input = caffe.io.oversample(clip_input,[227,227]) #make it 227x227
caffe_in = np.zeros(np.array(clip_input.shape)[[0,3,1,2]], dtype=np.float32) #initialize input matrix
for ix, inputs in enumerate(clip_input):
caffe_in[ix] = transformer.preprocess('data',inputs) # transform input data appropriatelly and add to input matrix
net.blobs['data'].reshape(caffe_in.shape[0], caffe_in.shape[1], caffe_in.shape[2], caffe_in.shape[3]) #make input caffe readable
out = net.forward_all(data=caffe_in) #feed input to the network
output_predictions[i:i+batch_size] = np.mean(out['probs'].reshape(10,caffe_in.shape[0]/10,2),0) #predict labels
#Store predicted Labels without smoothing
iMAX = output_predictions[i:i+batch_size].argmax(axis=1)[0]
prediction = classNamesCNN[iMAX]
output_classes.append(prediction)
#print "Predicted Label for file --> ", signal.upper() ,":", prediction
return output_classes, output_predictions
ClassifyWav.py 文件源码
python
阅读 32
收藏 0
点赞 0
评论 0
评论列表
文章目录