homology_graph.py 文件源码-python代码片段

def prune_homology_graph(df, chim_dir):
    to_remove = []
    df['brk_left_cut'] = df['name'].str.split(":").str[0:3].str.join(sep=":")
    df['brk_right_cut'] = df['name'].str.split(":").str[3:6].str.join(sep=":")
    left_nodes = set(df[df['brk_left_cut'].duplicated()]['brk_left_cut'])
    right_nodes = df[df['brk_right_cut'].duplicated()]['brk_right_cut']
    all_nodes = list(zip(left_nodes, itertools.repeat("left"))) + list(zip(right_nodes, itertools.repeat("right")))
    for node, hom_side in all_nodes:
        node_members = df[((df['brk_' + hom_side + '_cut'] == node))]['name']
        node_graph = nx.Graph()
        node_graph.add_nodes_from(node_members, exprs=10)
        for jxn1, jxn2 in itertools.combinations(node_members, 2):
            pair_score = get_pairwise_hom(jxn1, jxn2, chim_dir, hom_side)
            if pair_score != 0:
                node_graph.add_edge(jxn1, jxn2, weight=pair_score)
        # nx.draw_networkx(node_graph, pos=nx.shell_layout(node_graph), node_size=100)
        # plt.show()
        adj_mat = nx.to_pandas_dataframe(node_graph)
        node_compare = adj_mat[adj_mat.sum()> 0].index.tolist()
        if len(node_compare) > 0:
            node_homdf = df[df['name'].isin(node_compare)][['name', 'TPM_Fusion', 'TPM_Left', 'TPM_Right']].set_index('name')
            node_homdf['max_pairs'] = node_homdf[['TPM_Left','TPM_Right']].max(axis=1)
            node_homdf = node_homdf.sort_values(['TPM_Fusion', 'max_pairs'] , ascending=False)
            node_remove = node_homdf.iloc[1:].index.tolist()
            to_remove.extend(node_remove)
    # use list of to_remove to mark homologous fusions
    return to_remove