def fa2embl(fa,embl,gff,path):
if not os.path.exists(path): os.mkdir(path)
os.chdir(path)
df = pd.read_csv(gff,sep='\t',header=None,comment='#',usecols=[0,2])
df = df[df[2].values=='gene']
chroms = list(set(df[0].tolist()))
dic = SeqIO.index(fa,'fasta')
for s in chroms:
SeqIO.write(dic[s],open('fa','w'),'fasta')
sarge.run('grep \'{s}\' {gff} > gff'.format(s=s,gff=gff))
sarge.run('/home/shangzhong/Installation/EMBOSS-6.6.0/bin/seqret \
-sequence fa -feature -fformat gff -fopenfile1 gff -osformat2 embl \
-auto -outseq {s}.embl'.format(s=s))
fns = glob.glob('*.embl')
sarge.run('cat {files} > {embl}'.format(files=' '.join(fns),embl=embl))
# for f in fns:
# os.remove(f)
# fa2embl('/data/genome/hamster/ncbi_refseq/hamster.fa','hamster.embl','/data/genome/hamster/ncbi_refseq/hamster.gff','/data/shangzhong/Picr_assembly/Annotation/RATT/embl')
评论列表
文章目录