bubble_plot.py 文件源码-python代码片段

def plot_with_z(df, x, y, z_boolean, bins_x, bins_y, x_is_numeric, y_is_numeric, ordered_x_values, ordered_y_values, maximal_bubble_size=4000, normalization_by_all=False):
    count_table = pd.concat([pd.cut(df[x], bins=bins_x) if x_is_numeric else df[x],
                         pd.cut(df[y], bins=bins_y) if y_is_numeric else df[y], df[z_boolean]], axis=1)
    count_table = count_table.groupby([x,z_boolean])[y].value_counts().unstack().fillna(0)
    count_table = count_table.unstack()
    count_table_long = pd.melt(count_table.reset_index(), id_vars=x)
    z_boolean_values = count_table_long[z_boolean].unique()
    ratio = pd.DataFrame({'ratio':count_table_long.set_index([x,y,z_boolean]).unstack()['value'][z_boolean_values[1]] / (
    count_table_long.set_index([x,y,z_boolean]).unstack()['value'].sum(axis=1) )})
    count_table_long = count_table_long.set_index([x, y ])[['value']].merge(ratio, left_index=True, right_index=True).reset_index()
    size_factor = maximal_bubble_size/count_table_long['value'].max()
    x_values_dict = {x:i for i, x in enumerate(ordered_x_values)} \
        if not x_is_numeric else {xx:get_point(xx) for xx in ordered_x_values}
    y_values_dict = {x:i for i, x in enumerate(ordered_y_values)} \
        if not y_is_numeric else {xx: get_point(xx) for xx in ordered_y_values}
    xticks = np.arange(len(ordered_x_values)) if not x_is_numeric else [get_point(xx) for xx in ordered_x_values]
    yticks = np.arange(len(ordered_y_values)) if not y_is_numeric else [get_point(xx) for xx in ordered_y_values]
    xticklabels = ordered_x_values if not x_is_numeric else [get_point(xx) for xx in ordered_x_values]
    yticklabels = ordered_y_values if not y_is_numeric else [get_point(xx) for xx in ordered_y_values]
    count_table_long[x] = count_table_long[x].map(x_values_dict)
    count_table_long[y] = count_table_long[y].map(y_values_dict)
    plt.scatter(count_table_long[x], count_table_long[y], s=size_factor*count_table_long['value'],
                c=count_table_long['ratio'],  alpha=0.5,
                cmap='cool')
    return count_table_long, xticks, yticks, xticklabels, yticklabels