def transform(self,X):
"""
?????: ???0?n-1???????????-1?
X: ?????????DataFrame??Series?
???????????????????DataFrame??Series?
"""
data=X.copy()
if isinstance(data,np.ndarray):
if isinstance(self.fill_na,str):
raise Exception('numpy?????????????')
if not self.return_numeric:
warnings.warn('numpy????????????????????????dataframe?series?')
if not self.return_numeric:
newlabel=self.get_label()
if len(data.shape)==1:
tmp=np.searchsorted(self.cuts,data).astype(int)
result=np.where(np.isnan(data),-1,tmp)
if (not self.return_numeric) and (not isinstance(data,np.ndarray)):
f=np.frompyfunc(lambda xx: newlabel.get(xx,self.fill_na),1,1)
result=f(result)
if isinstance(data,np.ndarray):
result[result==-1]=self.fill_na
else:
result=pd.Series(result)
result.index=data.index
result.index.name=data.index.name
result.name=data.name
result[result==-1]=self.fill_na
data=result.copy()
else:
for feature in self.cuts:
if not isinstance(data,pd.DataFrame):
tmp=np.searchsorted(self.cuts[feature],data[:,feature]).astype(int)
data[:,feature]=np.where(np.isnan(data[:,feature]),self.fill_na,tmp)
else:
tmp=np.searchsorted(self.cuts[feature],data[feature]).astype(int)
data[feature]=np.where(np.isnan(data[feature]),-1,tmp)
if not self.return_numeric:
f=np.frompyfunc(lambda xx: newlabel[feature].get(xx,self.fill_na),1,1)
data[feature]=f(data[feature])
else:
data.loc[data[feature]==-1,feature]=self.fill_na
if self.return_array and isinstance(data,(pd.Series,pd.DataFrame)):
return data.values
else:
return data
评论列表
文章目录