ClassifyWav.py 文件源码

python
阅读 32 收藏 0 点赞 0 评论 0

项目:CNNs-Speech-Music-Discrimination 作者: MikeMpapa 项目源码 文件源码
def singleFrame_classify_video(signal, net, transformer, with_smoothing, classNamesCNN):
    batch_size = 1 
    input_images = []

    input_im = caffe.io.load_image(signal.replace(".wav",".png"))        
    input_images.append(input_im)
    os.remove(signal.replace(".wav",".png"))    
    #Initialize predictions matrix                
    output_predictions = np.zeros((len(input_images),2))
    output_classes = []
    #print [method for method in dir(net) if callable(getattr(net, method))]    

    for i in range(0,len(input_images)):        
        # print "Classifying Spectrogram: ",i+1         
        clip_input = input_images[i:min(i+batch_size, len(input_images))] #get every image -- batch_size==1
        clip_input = caffe.io.oversample(clip_input,[227,227]) #make it 227x227        
        caffe_in = np.zeros(np.array(clip_input.shape)[[0,3,1,2]], dtype=np.float32) #initialize input matrix
        for ix, inputs in enumerate(clip_input):
            caffe_in[ix] = transformer.preprocess('data',inputs) # transform input data appropriatelly and add to input matrix        
        net.blobs['data'].reshape(caffe_in.shape[0], caffe_in.shape[1], caffe_in.shape[2], caffe_in.shape[3]) #make input caffe readable        
        out = net.forward_all(data=caffe_in) #feed input to the network
        output_predictions[i:i+batch_size] = np.mean(out['probs'].reshape(10,caffe_in.shape[0]/10,2),0) #predict labels        

        #Store predicted Labels without smoothing        
        iMAX = output_predictions[i:i+batch_size].argmax(axis=1)[0]
        prediction = classNamesCNN[iMAX]
        output_classes.append(prediction)
        #print "Predicted Label for file -->  ", signal.upper() ,":",    prediction
    return output_classes, output_predictions
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号