def _generateFragments(self):
voc=set(self.vocabulary)
fpsdict = dict([(idx,{}) for idx in self.moldata.index])
nrows = self.moldata.shape[0]
counter = 0
with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
while counter < nrows:
nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
result = parallel(delayed(_generateMolFrags)(mollist, voc,
self.fragmentMethod,
self.fragIdx)
for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
for r in result:
counter+=len(r)
fpsdict.update(r)
self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]
# construct the molecule-fragment matrix as input for the LDA algorithm
评论列表
文章目录