sequences_prepare.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:augur 作者: nextstrain 项目源码 文件源码
def load_reference(self, path, fmts, metadata, include=2, genes=False):
        """Assume it's genbank."""
        try:
            self.reference = SeqIO.read(path, 'genbank')
        except Exception as e:
            self.log.fatal("Problem reading reference {}. Error: {}".format(path, e))

        ## some checks
        try:
            assert("strain" in metadata)
            if include > 0:
                assert("date" in metadata)
        except AssertionError as e:
            self.log.fatal("Poorly defined reference. Error:".format(e))

        if genes:
            # we used to make these FeatureLocation objects here, but that won't go to JSON
            # so just do it in the Process part instead. For reference:
            # FeatureLocation(start=f.location.start, end=f.location.end, strand=1)
            self.reference.genes = {
                sequence_set.get_gene_name(f.qualifiers['gene'][0], genes): {"start": int(f.location.start), "end": int(f.location.end), "strand": 1}
                for f in self.reference.features
                if 'gene' in f.qualifiers and f.qualifiers['gene'][0] in genes
            }
        else:
            self.reference.genes = {}

        # use the supplied metadata dict to define attributes
        seq_attr_keys = self.seqs.values()[0].attributes.keys()
        self.reference.attributes = {k:fix_names(v) for k,v in metadata.items() if k in seq_attr_keys}
        self.reference.name = self.reference.attributes["strain"]
        self.reference.id = self.reference.attributes["strain"]

        # is there any possibility that the reference will be added to the sequences?
        self.reference.include = include; # flag {0,1,2}
        if self.reference.name in self.seqs:
            self.log.notify("Segment {} reference already in dataset".format(self.segmentName))
            if include == 0:
                self.log.notify("Removing reference from pool of sequences to analyse")
                del self.seqs[self.reference.name]
        elif include > 0:
            ## add to sequences (tidy up attributes first)
            self._parse_date_per_seq(self.reference, fmts)
            self.seqs[self.reference.name] = self.reference
            missing_attrs = set(seq_attr_keys) - set(self.reference.attributes.keys()) - set(["date", "num_date"])
            if len(missing_attrs) > 0:
                self.log.notify("Including reference in segment {} but the following attributes are missing: {}".format(self.segmentName, " & ".join(missing_attrs)))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号