def __init__(self, logger, sequences, reference, dateFormat):
super(sequence_set, self).__init__()
self.log = logger
# load sequences from the (parsed) JSON - don't forget to sort out dates
self.seqs = {}
for name, data in sequences.iteritems():
self.seqs[name] = SeqRecord(Seq(data["seq"], generic_dna),
id=name, name=name, description=name)
self.seqs[name].attributes = data["attributes"]
# tidy up dates
date_struc = parse_date(self.seqs[name].attributes["raw_date"], dateFormat)
self.seqs[name].attributes["num_date"] = date_struc[1]
self.seqs[name].attributes["date"] = date_struc[2]
# if the reference is to be analysed it'll already be in the (filtered & subsampled)
# sequences, so no need to add it here, and no need to care about attributes etc
# we do, however, need it for alignment
self.reference_in_dataset = reference["included"]
name = reference["strain"]
self.reference_seq = SeqRecord(Seq(reference["seq"], generic_dna),
id=name, name=name, description=name)
if "genes" in reference and len(reference["genes"]):
self.proteins = {k:FeatureLocation(start=v["start"], end=v["end"], strand=v["strand"]) for k, v in reference["genes"].iteritems()}
else:
self.proteins = None
# other things:
self.run_dir = '_'.join(['temp', time.strftime('%Y%m%d-%H%M%S',time.gmtime()), str(random.randint(0,1000000))])
self.nthreads = 2 # should load from config file
评论列表
文章目录