TreeExtract.py 文件源码-python代码片段

TreeExtract.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

项目：ModelFlow 作者: yuezPrincetechs 项目源码文件源码

def count_features(self,X,verbose=False):
        '''
        ???????????????????????????
        X?dataframe??columns????self.columns?
        ???????????self.estimators_??????????dataframe?index?X?columns?self.columns????????
        '''
        result=[]
        for i,estimator in enumerate(self.estimators_):
            tmp=pd.Series(estimator.apply(X[self.columns]))
            tmp.index=X.index
            tmp=tmp.map(lambda xx: ' '.join([yy[0] for yy in self.paths[i][xx]]))
            vect=CountVectorizer(vocabulary=self.columns,lowercase=False)
            tmp=vect.transform(tmp).toarray()
            tmp=pd.DataFrame(tmp)
            vocabulary_inverse={vect.vocabulary_[key]:key for key in vect.vocabulary_}
            tmp.columns=[vocabulary_inverse[k] for k in range(tmp.shape[1])]
            tmp.index=X.index
            tmp.index.name=X.index.name
            tmp=tmp.fillna(0)
            result.append(tmp.copy())
            if verbose:
                print('Done:',i)
        return result