def filter_using_summary(fq, args):
"""Use quality scores from albacore summary file for filtering
Use the summary file from albacore for more accurate quality estimate
Get the dataframe from nanoget, convert to dictionary
"""
data = {entry[0]: entry[1] for entry in process_summary(
summaryfile=args.summary,
threads="NA",
readtype=args.readtype,
barcoded=False)[
["readIDs", "quals"]].itertuples(index=False)}
try:
for record in SeqIO.parse(fq, "fastq"):
if data[record.id] > args.quality and len(record) > args.length:
print(record[args.headcrop:args.tailcrop].format("fastq"), end="")
except KeyError:
logging.error("mismatch between summary and fastq: \
{} was not found in the summary file.".format(record.id))
sys.exit('\nERROR: mismatch between sequencing_summary and fastq file: \
{} was not found in the summary file.\nQuitting.'.format(record.id))
评论列表
文章目录