def _load_strain_sequences(self, strain_gempro):
"""Load strain sequences from the orthology matrix into the base model for comparisons, and into the
strain-specific model itself.
"""
if self._orthology_matrix_has_sequences: # Load directly from the orthology matrix if it contains sequences
strain_sequences = self.df_orthology_matrix[strain_gempro.id].to_dict()
else: # Otherwise load from the genome file if the orthology matrix contains gene IDs
# Load the genome FASTA file
log.debug('{}: loading strain genome CDS file'.format(strain_gempro.genome_path))
strain_sequences = SeqIO.index(strain_gempro.genome_path, 'fasta')
for strain_gene in strain_gempro.genes:
if strain_gene.functional:
if self._orthology_matrix_has_sequences:
strain_gene_key = strain_gene.id
else:
# Pull the gene ID of the strain from the orthology matrix
strain_gene_key = self.df_orthology_matrix.loc[strain_gene.id, strain_gempro.id]
log.debug('{}: original gene ID to be pulled from strain fasta file'.format(strain_gene_key))
# # Load into the base strain for comparisons
ref_gene = self.reference_gempro.genes.get_by_id(strain_gene.id)
new_id = '{}_{}'.format(strain_gene.id, strain_gempro.id)
if ref_gene.protein.sequences.has_id(new_id):
log.debug('{}: sequence already loaded into reference model'.format(new_id))
continue
ref_gene.protein.load_manual_sequence(seq=strain_sequences[strain_gene_key], ident=new_id,
set_as_representative=False)
log.debug('{}: loaded sequence into reference model'.format(new_id))
# Load into the strain GEM-PRO
strain_gene.protein.load_manual_sequence(seq=strain_sequences[strain_gene_key], ident=new_id,
set_as_representative=True)
log.debug('{}: loaded sequence into strain model'.format(new_id))
评论列表
文章目录