data_converter.py 文件源码

python
阅读 35 收藏 0 点赞 0 评论 0

项目:AutoML5 作者: djajetic 项目源码 文件源码
def tp_filter(X, Y, feat_num=1000, verbose=True):
    ''' TP feature selection in the spirit of the winners of the KDD cup 2001
    Only for binary classification and sparse matrices'''

    if issparse(X) and len(Y.shape)==1  and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: 
        if verbose: print("========= Filtering features...")
        Posidx=Y>0
        #npos = sum(Posidx)
        #Negidx=Y<=0
        #nneg = sum(Negidx)

        nz=X.nonzero()
        mx=X[nz].max()
        if X[nz].min()==mx: # sparse binary
            if mx!=1: X[nz]=1
            tp=csr_matrix.sum(X[Posidx,:], axis=0)
            #fn=npos-tp
            #fp=csr_matrix.sum(X[Negidx,:], axis=0)
            #tn=nneg-fp
        else:
            tp=np.sum(X[Posidx,:]>0, axis=0)
            #tn=np.sum(X[Negidx,:]<=0, axis=0)
            #fn=np.sum(X[Posidx,:]<=0, axis=0)
            #fp=np.sum(X[Negidx,:]>0, axis=0)

        tp=np.ravel(tp)
        idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)   
        return idx[0:feat_num]
    else:
        feat_num = X.shape[1]
        return range(feat_num)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号