chemTopicModel.py 文件源码-python代码片段

chemTopicModel.py 文件源码

python

阅读 79 收藏 0 点赞 0 评论 0

def _generateFragments(self):
        voc=set(self.vocabulary)
        fpsdict = dict([(idx,{}) for idx in self.moldata.index])
        nrows = self.moldata.shape[0]
        counter = 0
        with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
            while counter < nrows:
                nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
                result = parallel(delayed(_generateMolFrags)(mollist, voc,
                                                    self.fragmentMethod, 
                                                    self.fragIdx)
                                   for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
                for r in result:
                    counter+=len(r)
                    fpsdict.update(r)            
        self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]                

    # construct the molecule-fragment matrix as input for the LDA algorithm