def parsing_ORF_count():
parser = argparse.ArgumentParser(
description="This script is designed for calculating the number of reads mapping to ORF with the alignment files \
in SAM/BAM format (aligned to genome) and a feature file in GTF format"
)
parser.add_argument("-s","--stranded",dest="stranded",required=False,type=str,choices=["yes","reverse"],
default="yes",help="whether the data is strand-specific, \
reverse means reversed strand interpretation. (default: yes)")
parser.add_argument("-a","--minaqual",dest="min_quality",required=False,type=int,
default=10,help="skip all reads with alignment quality lower than the given minimum value (default:10)")
parser.add_argument("-c","--count_mode",dest="count_mode",required=False,type=str,choices=["union","intersection-strict"],
default="intersection-strict",help="mode to handle reads overlapping more than one ORF (choices:\
union,intersection-strict;default: intersection-strict)")
parser.add_argument("-g","--gtf",dest="gtf_file",required=False,type=str,default="final_result_collapsed.gtf",
help="ORF gtf file generated by RiboCode, default:final_result")
parser.add_argument("-r","--rpf_mapping_file",dest="rpf_mapping_file",required=True,type=str,
help="ribo-seq BAM/SAM file aligned to the genome, multiple files should be separated with \",\"")
parser.add_argument("-f","--first_exclude_codons",dest="first_exclude_codons",required=False,type=int,default=15,
help="excluding the reads aligned to the first few codons of the ORF, default:15")
parser.add_argument("-l","--last_exclude_codons",dest="last_exclude_codons",required=False,type=int,default=5,
help="excluding the reads aligned to the last few codons of the ORF, default:5")
parser.add_argument("-e","--exclude_min_ORF",dest="exclude_min_ORF",required=False,type=int,default=100,
help="the minimal length(nt) of ORF for excluding the reads aligned to first and last few codons, default:100")
parser.add_argument("-m","--min_read",dest="min_read",required=False,type=int,default=26,
help="minimal read length for the counting of RPF,default:26")
parser.add_argument("-M","--max_read",dest="max_read",required=False,type=int,default=34,
help="maximal read length for the counting of RPF,default:34")
# parser.add_argument("-p","--parallel_num",dest="parallel_num",required=False,type=int,default=1,
# help="the number of threads to read the alignment file(s), \
# the optimal value is the number of alignment files, default=1")
parser.add_argument("-o","--output",dest="output_file",required=False,type=str,
default="-",help="write out all ORF counts into a file, default is to write to standard output")
parser.add_argument('-V',"--version",action="version",version=__version__)
args = parser.parse_args()
if not os.path.exists(args.gtf_file):
raise ValueError("Error, the gtf file not found: {}".format(args.gtf_file))
if args.first_exclude_codons * 3 + args.last_exclude_codons * 3 >= args.exclude_min_ORF:
raise ValueError("Error, the exclude_min_ORF is too small: %i" % args.exclude_min_ORF)
if "," in args.rpf_mapping_file:
rpf_mapping_files = [i.strip() for i in args.rpf_mapping_file.split(",")]
args.rpf_mapping_file = rpf_mapping_files
else:
args.rpf_mapping_file = [args.rpf_mapping_file]
# if args.parallel_num > len(args.rpf_mapping_file):
# args.parallel_num = len(args.rpf_mapping_file)
return args
评论列表
文章目录