def from_orfs_files(cls, seq_path, paths_path, graph_path):
"""
Create object from graph, ORFs files
:param seq_path: path to .fasta file with ORFs sequences
:param paths_path: path to .path file with ORF's paths
:param graph_path: path to graph
:return: LinkageCluster
"""
from Bio import SeqIO
orfs = collections.OrderedDict()
with open(seq_path) as seq_file, open(paths_path) as path_file:
paths_dict = {}
orfid = ''
for ind, line in enumerate(path_file.read().split('\n')):
if ind % 2 == 0:
try:
orfid = re.findall(r'(ORF_\d+)', line)[0]
except:
continue
else:
paths_dict[orfid] = [int(re.findall(r'^(\d+),', line)[0])] + re.findall('(\d+)([+-]),', line) +\
[int(re.findall(r',(\d+)$', line)[0])]
for rec in SeqIO.parse(seq_file, 'fasta'):
orfs[str(rec.seq)] = (paths_dict[rec.id], rec.id)
return cls(GFAgraph().load_graph(graph_path), orfs)
评论列表
文章目录