def main(args):
n = 0
first = True
written = 0
stats = FilteringStatistics()
for chunk in pd.read_csv(args.table, chunksize=10000, sep='\t'):
fix_columns(chunk)
n += len(chunk)
filtered, chunk_stats = filtered_table(chunk, v_gene_coverage=args.v_coverage,
j_gene_coverage=args.j_coverage, v_gene_evalue=args.v_evalue)
stats += chunk_stats
print(filtered.to_csv(sep='\t', index=False, header=first), end='')
first = False
written += len(filtered)
logger.info('%s rows in input table', stats.n)
logger.info('%s rows have both V and J assignment', stats.vjassigned)
logger.info('%s of those do not have a stop codon', stats.stop)
logger.info('%s of those have an E-value of at most %s', stats.v_evalue, args.v_evalue)
logger.info('%s of those cover the V gene by at least %s%%', stats.v_coverage, args.v_coverage)
logger.info('%s of those cover the J gene by at least %s%%', stats.j_coverage, args.j_coverage)
logger.info('%d rows written', written)
评论列表
文章目录