filter.py 文件源码-python代码片段

filter.py 文件源码

python

阅读 39 收藏 0 点赞 0 评论 0

项目：IgDiscover 作者: NBISweden 项目源码文件源码

def main(args):
    n = 0
    first = True
    written = 0
    stats = FilteringStatistics()
    for chunk in pd.read_csv(args.table, chunksize=10000, sep='\t'):
        fix_columns(chunk)
        n += len(chunk)
        filtered, chunk_stats = filtered_table(chunk, v_gene_coverage=args.v_coverage,
            j_gene_coverage=args.j_coverage, v_gene_evalue=args.v_evalue)
        stats += chunk_stats
        print(filtered.to_csv(sep='\t', index=False, header=first), end='')
        first = False
        written += len(filtered)

    logger.info('%s rows in input table', stats.n)
    logger.info('%s rows have both V and J assignment', stats.vjassigned)
    logger.info('%s of those do not have a stop codon', stats.stop)
    logger.info('%s of those have an E-value of at most %s', stats.v_evalue, args.v_evalue)
    logger.info('%s of those cover the V gene by at least %s%%', stats.v_coverage, args.v_coverage)
    logger.info('%s of those cover the J gene by at least %s%%', stats.j_coverage, args.j_coverage)
    logger.info('%d rows written', written)