discretize.py 文件源码

python
阅读 38 收藏 0 点赞 0 评论 0

项目:ModelFlow 作者: yuezPrincetechs 项目源码 文件源码
def transform(self,X):
        """
        ?????: ???0?n-1???????????-1?
        X: ?????????DataFrame??Series?
        ???????????????????DataFrame??Series?
        """
        data=X.copy()
        if isinstance(data,np.ndarray):
            if isinstance(self.fill_na,str):
                raise Exception('numpy?????????????')
            if not self.return_numeric:
                warnings.warn('numpy????????????????????????dataframe?series?')
        if not self.return_numeric:
            newlabel=self.get_label()
        if len(data.shape)==1:
            tmp=np.searchsorted(self.cuts,data).astype(int)
            result=np.where(np.isnan(data),-1,tmp)
            if (not self.return_numeric) and (not isinstance(data,np.ndarray)):
                f=np.frompyfunc(lambda xx: newlabel.get(xx,self.fill_na),1,1)
                result=f(result)
            if isinstance(data,np.ndarray):
                result[result==-1]=self.fill_na
            else:
                result=pd.Series(result)
                result.index=data.index
                result.index.name=data.index.name
                result.name=data.name
                result[result==-1]=self.fill_na
            data=result.copy()
        else:
            for feature in self.cuts:
                if not isinstance(data,pd.DataFrame):
                    tmp=np.searchsorted(self.cuts[feature],data[:,feature]).astype(int)
                    data[:,feature]=np.where(np.isnan(data[:,feature]),self.fill_na,tmp)
                else:
                    tmp=np.searchsorted(self.cuts[feature],data[feature]).astype(int)
                    data[feature]=np.where(np.isnan(data[feature]),-1,tmp)
                    if not self.return_numeric:
                        f=np.frompyfunc(lambda xx: newlabel[feature].get(xx,self.fill_na),1,1)
                        data[feature]=f(data[feature])
                    else:
                        data.loc[data[feature]==-1,feature]=self.fill_na
        if self.return_array and isinstance(data,(pd.Series,pd.DataFrame)):
            return data.values
        else:
            return data
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号