def generatefinaltable(resultdic, totaldic, lib, dfile):
'''
merge df based on sublib
'''
#Generate count table
count_df = pd.DataFrame()
for sublibname, c_df in resultdic.items():
sublib = lib[sublibname]
df = sublib.merge(c_df,on='Sequence',how='left')
df = df.fillna(0)
count_df = count_df.append(df)
#Generate summary table
count_columns = count_df.columns.tolist()
count_columns.insert(0,count_columns.pop(count_columns.index('sgRNA')))
count_columns.insert(1,count_columns.pop(count_columns.index('Gene')))
count_columns.insert(2,count_columns.pop(count_columns.index('Sequence')))
count_columns.insert(3,count_columns.pop(count_columns.index('sublib')))
count_df = count_df.loc[:,count_columns]
mapped_total = count_df.iloc[:,3:].groupby("sublib").sum().reset_index()
mapped_total_df = pd.melt(mapped_total, id_vars=['sublib'],var_name=['sample'],value_name='mapped_reads')
totalread_df = pd.DataFrame(totaldic.items(),columns=["filepath","total_reads"])
if isinstance(dfile,pd.DataFrame):
summary_df = dfile.merge(totalread_df,on="filepath")
summary_df = summary_df.merge(mapped_total_df,on=['sublib','sample'])
else:#single file
summary_df = totalread_df
summary_df = summary_df.join(mapped_total_df)
summary_df['mapping_ratio'] = summary_df['mapped_reads']/summary_df['total_reads']
summary_df = summary_df.loc[:,['filepath','sample','sublib','total_reads','mapped_reads','mapping_ratio']]
return (count_df, summary_df)
评论列表
文章目录