def load_dataframe(db_file, dialect='excel-tab'):
df = pd.read_csv(db_file, dialect=dialect)
df = df.rename(columns=dict(zip(df.columns, df.columns.str.lower())))
# df = df[df['functional'] == 'F']
# parse v and j genes to speed up computation later
df['v_gene_set'] = [set(
parseAllele(x, gene_regex, 'set')) for x in df.v_call]
df['v_gene_set_str'] = [str(set(
parseAllele(x, gene_regex, 'set'))) for x in df.v_call]
df['j_gene_set'] = [set(
parseAllele(x, gene_regex, 'set')) for x in df.j_call]
df['junc'] = [junction_re(x) for x in df.junction]
df['aa_junc'] = [str(Seq(x, generic_dna).translate()) for x in df.junc]
df['aa_junction_length'] = [len(x) for x in df.aa_junc]
return df
评论列表
文章目录