def save_as_hdf5_acc(g, outHDF5):
NumAcc = len(g.accessions)
log.info("Writing into HDF5 file acc wise")
h5file = h5py.File(outHDF5, 'w')
NumSNPs = len(g.snps)
h5file.create_dataset('accessions', data=g.accessions, shape=(NumAcc,))
h5file.create_dataset('positions', data=g.positions, shape=(NumSNPs,),dtype='i4')
h5file['positions'].attrs['chrs'] = g.chrs
h5file['positions'].attrs['chr_regions'] = g.chr_regions
h5file.create_dataset('snps', shape=(NumSNPs, NumAcc), dtype='int8', compression="gzip", chunks=((NumSNPs, 1)))
for i in range(NumAcc):
h5file['snps'][:,i] = np.array(g.snps)[:,i]
if i+1 % 10 == 0:
log.info("written SNP info for %s accessions", i+1)
h5file['snps'].attrs['data_format'] = g.data_format
h5file['snps'].attrs['num_snps'] = NumSNPs
h5file['snps'].attrs['num_accessions'] = NumAcc
h5file.close()
评论列表
文章目录