def load_reference(self, path, fmts, metadata, include=2, genes=False):
"""Assume it's genbank."""
try:
self.reference = SeqIO.read(path, 'genbank')
except Exception as e:
self.log.fatal("Problem reading reference {}. Error: {}".format(path, e))
## some checks
try:
assert("strain" in metadata)
if include > 0:
assert("date" in metadata)
except AssertionError as e:
self.log.fatal("Poorly defined reference. Error:".format(e))
if genes:
# we used to make these FeatureLocation objects here, but that won't go to JSON
# so just do it in the Process part instead. For reference:
# FeatureLocation(start=f.location.start, end=f.location.end, strand=1)
self.reference.genes = {
sequence_set.get_gene_name(f.qualifiers['gene'][0], genes): {"start": int(f.location.start), "end": int(f.location.end), "strand": 1}
for f in self.reference.features
if 'gene' in f.qualifiers and f.qualifiers['gene'][0] in genes
}
else:
self.reference.genes = {}
# use the supplied metadata dict to define attributes
seq_attr_keys = self.seqs.values()[0].attributes.keys()
self.reference.attributes = {k:fix_names(v) for k,v in metadata.items() if k in seq_attr_keys}
self.reference.name = self.reference.attributes["strain"]
self.reference.id = self.reference.attributes["strain"]
# is there any possibility that the reference will be added to the sequences?
self.reference.include = include; # flag {0,1,2}
if self.reference.name in self.seqs:
self.log.notify("Segment {} reference already in dataset".format(self.segmentName))
if include == 0:
self.log.notify("Removing reference from pool of sequences to analyse")
del self.seqs[self.reference.name]
elif include > 0:
## add to sequences (tidy up attributes first)
self._parse_date_per_seq(self.reference, fmts)
self.seqs[self.reference.name] = self.reference
missing_attrs = set(seq_attr_keys) - set(self.reference.attributes.keys()) - set(["date", "num_date"])
if len(missing_attrs) > 0:
self.log.notify("Including reference in segment {} but the following attributes are missing: {}".format(self.segmentName, " & ".join(missing_attrs)))
评论列表
文章目录