def count_features(self,X,verbose=False):
'''
???????????????????????????
X?dataframe??columns????self.columns?
???????????self.estimators_??????????dataframe?index?X?columns?self.columns????????
'''
result=[]
for i,estimator in enumerate(self.estimators_):
tmp=pd.Series(estimator.apply(X[self.columns]))
tmp.index=X.index
tmp=tmp.map(lambda xx: ' '.join([yy[0] for yy in self.paths[i][xx]]))
vect=CountVectorizer(vocabulary=self.columns,lowercase=False)
tmp=vect.transform(tmp).toarray()
tmp=pd.DataFrame(tmp)
vocabulary_inverse={vect.vocabulary_[key]:key for key in vect.vocabulary_}
tmp.columns=[vocabulary_inverse[k] for k in range(tmp.shape[1])]
tmp.index=X.index
tmp.index.name=X.index.name
tmp=tmp.fillna(0)
result.append(tmp.copy())
if verbose:
print('Done:',i)
return result
评论列表
文章目录