def prune_homology_graph(df, chim_dir):
to_remove = []
df['brk_left_cut'] = df['name'].str.split(":").str[0:3].str.join(sep=":")
df['brk_right_cut'] = df['name'].str.split(":").str[3:6].str.join(sep=":")
left_nodes = set(df[df['brk_left_cut'].duplicated()]['brk_left_cut'])
right_nodes = df[df['brk_right_cut'].duplicated()]['brk_right_cut']
all_nodes = list(zip(left_nodes, itertools.repeat("left"))) + list(zip(right_nodes, itertools.repeat("right")))
for node, hom_side in all_nodes:
node_members = df[((df['brk_' + hom_side + '_cut'] == node))]['name']
node_graph = nx.Graph()
node_graph.add_nodes_from(node_members, exprs=10)
for jxn1, jxn2 in itertools.combinations(node_members, 2):
pair_score = get_pairwise_hom(jxn1, jxn2, chim_dir, hom_side)
if pair_score != 0:
node_graph.add_edge(jxn1, jxn2, weight=pair_score)
# nx.draw_networkx(node_graph, pos=nx.shell_layout(node_graph), node_size=100)
# plt.show()
adj_mat = nx.to_pandas_dataframe(node_graph)
node_compare = adj_mat[adj_mat.sum()> 0].index.tolist()
if len(node_compare) > 0:
node_homdf = df[df['name'].isin(node_compare)][['name', 'TPM_Fusion', 'TPM_Left', 'TPM_Right']].set_index('name')
node_homdf['max_pairs'] = node_homdf[['TPM_Left','TPM_Right']].max(axis=1)
node_homdf = node_homdf.sort_values(['TPM_Fusion', 'max_pairs'] , ascending=False)
node_remove = node_homdf.iloc[1:].index.tolist()
to_remove.extend(node_remove)
# use list of to_remove to mark homologous fusions
return to_remove
评论列表
文章目录