Vectorizer.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:kindred 作者: jakelever 项目源码 文件源码
def _vectorize(self,corpus,fit):
        assert isinstance(corpus,kindred.Corpus)

        matrices = []
        for feature in self.chosenFeatures:
            assert feature in self.featureInfo.keys()
            featureFunction = self.featureInfo[feature]['func']
            never_tfidf = self.featureInfo[feature]['never_tfidf']
            data = featureFunction(corpus)
            notEmpty = any( len(d)>0 for d in data )
            if fit:
                if notEmpty:
                    self.dictVectorizers[feature] = DictVectorizer()
                    if self.tfidf and not never_tfidf:
                        self.tfidfTransformers[feature] = TfidfTransformer()
                        intermediate = self.dictVectorizers[feature].fit_transform(data)
                        matrices.append(self.tfidfTransformers[feature].fit_transform(intermediate))
                    else:
                        matrices.append(self.dictVectorizers[feature].fit_transform(data))
            else:
                if feature in self.dictVectorizers:
                    if self.tfidf and not never_tfidf:
                        intermediate = self.dictVectorizers[feature].transform(data)
                        matrices.append(self.tfidfTransformers[feature].transform(intermediate))
                    else:
                        matrices.append(self.dictVectorizers[feature].transform(data))

        mergedMatrix = hstack(matrices)
        return mergedMatrix
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号