def boxplot_metrics(df, eval_dir):
"""
Create summary boxplots of all geometric measures.
:param df:
:param eval_dir:
:return:
"""
boxplots_file = os.path.join(eval_dir, 'boxplots.eps')
fig, axes = plt.subplots(3, 1)
fig.set_figheight(14)
fig.set_figwidth(7)
sns.boxplot(x='struc', y='dice', hue='phase', data=df, palette="PRGn", ax=axes[0])
sns.boxplot(x='struc', y='hd', hue='phase', data=df, palette="PRGn", ax=axes[1])
sns.boxplot(x='struc', y='assd', hue='phase', data=df, palette="PRGn", ax=axes[2])
plt.savefig(boxplots_file)
plt.close()
return 0
python类boxplot()的实例源码
def plot(params_dir):
model_dirs = [name for name in os.listdir(params_dir)
if os.path.isdir(os.path.join(params_dir, name))]
df = defaultdict(list)
for model_dir in model_dirs:
df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [
dd.io.load(path)['best_epoch']['validate_objective']
for path in glob.glob(os.path.join(
params_dir, model_dir) + '/*.h5')]
df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()]))
df.to_csv(os.path.basename(os.path.normpath(params_dir)))
plt.figure(figsize=(16, 4), dpi=300)
g = sns.boxplot(df)
g.set_xticklabels(df.columns, rotation=45)
plt.tight_layout()
plt.savefig('{}_errors_box_plot.png'.format(
os.path.join(IMAGES_DIRECTORY,
os.path.basename(os.path.normpath(params_dir)))))
def plot_group(data_frame, path_output):
# optional import
import seaborn as sns
path_output_image = os.path.join(path_output, "summary_statistics.png")
# # Plotting swarmplot
# plt.figure(num=None, figsize=(15, 7), dpi=120)
# sns.set_style("whitegrid")
#
# plt.title('Violin plot with single measurements')
# sns.violinplot(x="Group", y="DAB+ area", data=data_frame, inner=None)
# sns.swarmplot(x="Group", y="DAB+ area", data=data_frame, color="w", alpha=.5)
# plt.savefig(path_output_image)
#
# plt.tight_layout()
sns.set_style("whitegrid")
sns.set_context("talk")
plt.figure(num=None, figsize=(15, 7), dpi=120)
plt.ylim(0, 100)
plt.title('Box plot')
sns.boxplot(x="Group", y="DAB+ area, %", data=data_frame)
plt.tight_layout()
plt.savefig(path_output_image, dpi=300)
def boxplot(self, fig_width: Number, fig_height: Number = None):
"""
Creates a (horizontal) box plot comparing all single object for a given property.
:param fig_width: width of the figure in cm
:param fig_height: height of the figure in cm, if None it is calculated from the figure width using the
aesthetic ratio
"""
import seaborn as sns
import matplotlib.pyplot as plt
self.reset_plt()
if fig_height is None:
fig_height = self._height_for_width(fig_width)
self._fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height))
df = self.get_data_frame()
sns.boxplot(data=df, orient="h")
def plot_averages(df, figname, fignum, use_en_source=True, num_accs=3):
plt.figure(fignum)
if use_en_source:
df_side = df[(df.source == 'en') & (df.target != 'en')]
layers = np.concatenate([[i]*5 for i in range(5)] * num_accs)
else:
df_side = df[(df.source != 'en') & (df.target == 'en')]
layers = list(range(5))*5*num_accs
accs = get_accs_from_df(df_side, col_pref='acc')
flat_accs = np.concatenate(accs)
df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs })
#print(df_plot)
sns.boxplot(x='Layer', y='Accuracy', data=df_plot)
plt.tight_layout()
plt.savefig(figname)
return fignum + 1
def plot_averages_by_type(df, figname, fignum, use_en_source=True, pointplot=True, layer0=True):
plt.figure(fignum)
if use_en_source:
df_side = df[(df.source == 'en') & (df.target != 'en')]
else:
df_side = df[(df.source != 'en') & (df.target == 'en')]
if not layer0:
df_side = df_side[df_side.layer != '0']
plotfunc = sns.pointplot if pointplot else sns.boxplot
if pointplot:
plotfunc(x='accuracy', y='relation', hue='layer', data=df_side, join=False)
else:
plotfunc(x='accuracy', y='relation', hue='layer', data=df_side)
plt.xlabel('Accuracy')
plt.ylabel('')
plt.tight_layout()
plt.savefig(figname)
return fignum + 1
def cross_section_cndl(data, factor_name):
'''???????????????
??????????????
??
------------------------------
data:DataFrame(index:[Date,IDs],factor1,factor2,...)
factor_name:str
'''
data = data.reset_index()
sns.set(style='ticks')
ax = sns.boxplot(x='Date', y=factor_name, data=data, palette='PRGn')
sns.despine(offset=10, trim=True)
return ax
# ??2
# ?????, ?????????????
def plot_author_contributions(commit_frame):
sns.boxplot(x='author', y='stats_total_lines',
data=commit_frame,
orient='v')
plt.title('Code Contributions by Authors')
plt.xlabel('Author')
plt.ylabel('Total Lines Committed')
plt.xticks(rotation=70)
plt.show()
def plot_fnc(self, *args, **kwargs):
sns.boxplot(*args, **kwargs)
def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]:
"""
Calculates the upper and the lower whisker for a boxplot.
I.e. the minimum and the maximum value of the data set
the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR).
IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile.
Adapted from http://stackoverflow.com/a/20096945
"""
q1, q2, q3 = self.quartiles()
iqr = self.iqr()
hi_val = q1 + whis * self.iqr()
whisk_hi = np.compress(self.array <= hi_val, self.array)
if len(whisk_hi) == 0 or np.max(whisk_hi) < q3:
whisk_hi = q3
else:
whisk_hi = max(whisk_hi)
# get low extreme
lo_val = q1 - whis * iqr
whisk_lo = np.compress(self.array >= lo_val, self.array)
if len(whisk_lo) == 0 or np.min(whisk_lo) > q1:
whisk_lo = q1
else:
whisk_lo = min(whisk_lo)
return whisk_lo, whisk_hi
def plot_averages_by_distance(df, figname, fignum, use_en_source=True, num_accs=24, pointplot=True, hue='Distance'):
plt.figure(fignum)
if use_en_source:
df_side = df[(df.source == 'en') & (df.target != 'en')]
layers = np.concatenate([[i]*5 for i in range(5)] * num_accs)
else:
df_side = df[(df.source != 'en') & (df.target == 'en')]
layers = list(range(5))*5*num_accs
accs = get_accs_from_df(df_side, col_pref='dist')
flat_accs = np.concatenate(accs)
dists = np.concatenate([[pretty_dist_names_list[i]]*75 for i in range(8)])
df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs, 'Distance' : dists })
#print(df_plot)
plotfunc = sns.pointplot if pointplot else sns.boxplot
if hue == 'Distance':
plotfunc(x='Layer', y='Accuracy', data=df_plot, hue='Distance')
else:
plotfunc(x='Distance', y='Accuracy', data=df_plot, hue='Layer')
plt.xticks(range(8), pretty_dist_names_list)
plt.tight_layout()
plt.savefig(figname)
return fignum + 1
def explore_feature_variation(self, col=None, use_target=False, **kwargs):
'''
Produces univariate plots of a given set of columns. Barplots are used
for categorical columns while histograms (with fitted density functinos)
are used for numerical columns.
If use_target is true, then the variation of the given set of columns
with respect to the response variable are used (e.g., 2d scatter
plots, boxplots, etc).
Parameters
----------
col : a string of a column name, or a list of many columns names or
None (default). If col is None, all columns will be used.
use_target : bool, default False
Whether to use the target column in the plots.
**kwargs: additional arguments to be passed to seaborn's distplot or
to pandas's plotting utilities..
'''
self._validate_params(params_list = {'col':col},
expected_types= {'col':[str,list,type(None)]})
if type(col) is str: col = [col]
if col is None: col = self._get_all_features()
if use_target == False:
for column in col:
if self.is_numeric(self.df[column]) == True:
plt.figure(column)
#sns.despine(left=True)
sns.distplot(self.df[column], color="m", **kwargs)
plt.title(column)
plt.tight_layout()
#plt.figure('boxplot')
#sns.boxplot(x=self.df[col], palette="PRGn")
#sns.despine(offset=10, trim=True)
elif self.is_categorical(self.df[column]) == True:
#print self.df[column].describe()
plt.figure(column)
#sns.despine(left=True)
if len(self.df[column].unique()) > 30:
self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
#top = pd.DataFrame(data=top)
#sns.barplot(y=top.index, x=top)
else:
self.df[column].value_counts()[::-1].plot.barh(**kwargs)
#sns.countplot(y=self.df[column])
plt.title(column)
plt.tight_layout()
else:
raise TypeError('TYPE IS NOT SUPPORTED')
else: # use target variable
for column in col:
self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def calculate_word_count_stats(articles: pd.DataFrame):
"""Calculate aggregate word count statistics on each source's articles."""
by_source = articles.groupby(['base_url'])['word_count']
by_source = by_source.agg(['count', 'mean', 'std'])
by_source.sort_values('count', ascending=False, inplace=True)
print_full(by_source)
top_sources = by_source.head(10).index
top_counts = by_source.reset_index()[by_source.index.isin(top_sources)]
sns.barplot(x='base_url', y='count', data=top_counts)
sns.plt.show()
sns.boxplot(x='base_url', y='word_count',
data=articles[articles['base_url'].isin(top_sources)])
sns.plt.show()
def show_articles_by_source(articles: pd.DataFrame):
"""Show boxplot comparing articles by source for fake and true news."""
by_source = (articles.groupby(['base_url', 'labels'])
.size()
.reset_index(name='count'))
by_source = by_source[by_source['count'] > 100]
sns.boxplot(x='labels', y='count', data=by_source)
sns.plt.show()
def plot_box( self, fname_out = None):
sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn")
sns.despine(offset=10, trim=True)
plt.ylabel( r"$r^2$")
plt.xlabel( "Methods")
if fname_out is not None:
plt.savefig( fname_out) # index should be stored.
elif self.fname is not None:
fname_out = self.fname[:-4] + '_box.eps'
print( 'Default: the figure of self.df_best_expand is saved to', fname_out)
plt.savefig( fname_out)
def plot_box( self, fname_out = None):
sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn")
sns.despine(offset=10, trim=True)
plt.ylabel( r"$r^2$")
plt.xlabel( "Methods")
if fname_out is not None:
plt.savefig( fname_out) # index should be stored.
elif self.fname is not None:
fname_out = self.fname[:-4] + '_box.eps'
print( 'Default: the figure of self.df_best_expand is saved to', fname_out)
plt.savefig( fname_out)
def boxplot_expension( pdr, method_l, x="Group", y="RP", hue="Method"):
# method_l = ['No_Regression', 'Mean_Compensation', 'Linear', 'Exp']
val_s = y
pdw = expension_4_boxplot( pdr, method_l, x=x, y=y, hue=hue)
sns.boxplot(x="Group", y=val_s, hue="Method", data=pdw, palette="PRGn")
sns.despine(offset=10, trim=True)
def plot_multi(names, models, angles, runs=1000):
indices = np.random.permutation(len(X_test))[:runs]
matched_all = []
for i, idx in enumerate(indices):
print("Processing {}/{}".format(i, len(indices)))
probs, matched = compare(idx, angles, models)
matched_all.append(matched)
matched_all = np.array(matched_all)
order = np.argsort(np.mean(matched_all, axis=0))
df = pd.DataFrame.from_items([(names[i], matched_all[:, i]) for i in order])
sb.boxplot(data=df)
plt.show()
def stripboxplot(x, y, data, ax=None, significant=None, **kwargs):
"""
Overlay a stripplot on top of a boxplot.
"""
ax = sb.boxplot(
x=x,
y=y,
data=data,
ax=ax,
fliersize=0,
**kwargs
)
plot = sb.stripplot(
x=x,
y=y,
data=data,
ax=ax,
jitter=kwargs.pop("jitter", 0.05),
color=kwargs.pop("color", "0.3"),
**kwargs
)
if data[y].min() >= 0:
hide_negative_y_ticks(plot)
if significant is not None:
add_significance_indicator(plot=plot, significant=significant)
return plot
def visualize_feature_boxplot(X,y,selected_feature,features):
"""
Visualize the boxplot of a feature
Keyword arguments:
X -- The feature vectors
y -- The target vector
selected_feature -- The desired feature to obtain the histogram
features -- Vector of feature names (X1 to XN)
"""
#create data
joint_data=np.column_stack((X,y))
column_names=features
#create dataframe
df=pd.DataFrame(data=joint_data,columns=column_names)
# palette = sea.hls_palette()
splot=sea.boxplot(data=df,x='Y',y=selected_feature,hue="Y",palette="husl")
plt.title('BoxPlot Distribution of '+selected_feature)
#save fig
output_dir = "img"
save_fig(output_dir,'{}/{}_boxplot.png'.format(output_dir,selected_feature))
# plt.show()
def inspect_bulk(df, df_bulk, de_genes, de_genes_bulk):
"""
"""
quant_types = [("bitseq", df_bulk)]
for quant_type, exp_matrix in quant_types:
print(quant_type)
# Boxplots of expression
fig, axis = plt.subplots(1)
sns.boxplot(data=pd.melt(exp_matrix), x="grna", y="value", hue="condition", ax=axis)
fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.expression_boxplots.png".format(quant_type)), dpi=300, bbox_inches="tight")
# Heatmap and correlation on signature genes
# derived from bulk
# derived from scRNA
for geneset in ["de_genes", "de_genes_bulk"]:
g = sns.clustermap(
exp_matrix.ix[eval(geneset)].dropna(),
z_score=0,
row_cluster=True, col_cluster=True,
xticklabels=True, yticklabels=True,
figsize=(15, 15))
for item in g.ax_heatmap.get_yticklabels():
item.set_rotation(0)
for item in g.ax_heatmap.get_xticklabels():
item.set_rotation(90)
g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")
g = sns.clustermap(
exp_matrix.ix[eval(geneset)].dropna().corr(),
row_cluster=True, col_cluster=True,
xticklabels=True, yticklabels=True,
figsize=(15, 15))
for item in g.ax_heatmap.get_yticklabels():
item.set_rotation(0)
for item in g.ax_heatmap.get_xticklabels():
item.set_rotation(90)
g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.correlation.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")
def plot_box(df, x, y, hue, tag='eda', directory=None):
r"""Display a Box Plot.
Parameters
----------
df : pandas.DataFrame
The dataframe containing the ``x`` and ``y`` features.
x : str
Variable name in ``df`` to display along the x-axis.
y : str
Variable name in ``df`` to display along the y-axis.
hue : str
Variable name to be used as hue, i.e., another data dimension.
tag : str
Unique identifier for the plot.
directory : str, optional
The full specification of the plot location.
Returns
-------
None : None.
References
----------
http://seaborn.pydata.org/generated/seaborn.boxplot.html
"""
logger.info("Generating Box Plot")
# Generate the box plot
box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df)
sns.despine(offset=10, trim=True)
box_fig = box_plot.get_figure()
# Save the plot
write_plot('seaborn', box_fig, 'box_plot', tag, directory)
#
# Function plot_swarm
#
def plot_alphadf(alphasdf, col_order, labeldict, metric='alpha'):
"""
Plot faceted alpha diversity.
Parameters
----------
alphasdf : pandas DataFrame
columns ['study', 'alpha', 'DiseaseState']
col_order : list
dataset IDs in the order they should be plotted
labeldict : dict
dictionary with {dataset: label}
mteric : str
alpha diversity metric, to use in labeling y axis
Returns
-------
fig : Figure
"""
sns.set_style('white')
g = sns.FacetGrid(alphasdf, col='study', col_wrap=6,
col_order=col_order, sharex=False, sharey=False)
g = g.map(sns.boxplot, "DiseaseState", "alpha")
g = g.map(sns.stripplot, "DiseaseState", "alpha", split=True, jitter=True,
size=5, linewidth=0.6)
fig = plt.gcf()
fig.set_size_inches(14.2, 9)
# Fix y-axis gridlines
axs = g.axes
for i in range(len(axs)):
ax = axs[i]
yticks = ax.get_yticks()
# If bottom limit is between 0 and 1 (i.e. not simpson)
if not (yticks[0] < 1 and yticks[0] > 0):
ax.set_ylim(floor(yticks[0]), floor(yticks[-1]))
if yticks[0] < 0:
ax.set_ylim(0, floor(yticks[-1]))
yticks = ax.get_yticks()
if (yticks[0] < 1 and yticks[0] > 0):
ax.set_yticks(yticks[1::2])
else:
ax.set_yticks(yticks[::2])
# Need some space on the y-axis for p-values
ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])
# Update title
oldtitle = ax.get_title()
newtitle = labeldict[oldtitle.split('=')[1].strip()]
ax.set_title(newtitle)
# Update y label
if i % 6 == 0:
ax.set_ylabel(metric)
plt.tight_layout()
return fig
figure.ubiquity_abundance_boxplots.py 文件源码
项目:microbiomeHD
作者: cduvallet
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def plot_ubiq_abun_boxplot(tidy, metric, calculation):
"""
Plot boxplot where x-axis is 'overall_significance' of genus, and values
are either ubiquity or abundance in tidy (with the respective metric and
calculation type)
Parameters
----------
tidy : pandas dataframe
has columns overall_significance, value, patient, metric, and calculation
metric : str
'abundance' or 'ubiquity'
calculation: str
'from_pooled_mean' or 'mean_of_datasets'
Returns
-------
ax : Axis object
"""
fig, ax = plt.subplots(figsize=(5.5,4))
tmp = tidy.query('metric == @metric')\
.query('calculation == @calculation')\
.query('patient == "total"')
boxprops = {'edgecolor': 'k', 'facecolor': 'w'}
lineprops = {'color': 'k'}
# Plot log10(abundance)
if metric == 'abundance':
tmp.loc[tmp.index, 'value'] = tmp['value'].apply(np.log10)
sns.boxplot(data=tmp, x='overall_significance', y='value',
fliersize=0, ax=ax, color='w',
order=['health', 'disease', 'mixed', 'not_sig'],
**{'boxprops': boxprops, 'medianprops': lineprops,
'whiskerprops': lineprops, 'capprops': lineprops})
sns.stripplot(data=tmp, x='overall_significance', y='value',
jitter=True, linewidth=0.6, split=True, ax=ax,
order=['health', 'disease', 'mixed', 'not_sig'],
color='w')
return fig, ax
def visualize_boxplots(X,y):
"""
Visualize the boxplots of the features
Keyword arguments:
X -- The feature vectors
y -- The target vector
"""
credit=X[:,0:1]
df=pd.DataFrame(data=credit,columns=["Credit"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Credit')
plt.show()
one_to_four_columns=X[:,1:4]
df=pd.DataFrame(data=one_to_four_columns,columns=["Gender","Education","Marital Status"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Features: Gender, Education and Marital Status')
plt.show()
age=X[:,4:5]
df=pd.DataFrame(data=age,columns=["Age"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Age')
plt.show()
x6_to_x11=X[:,5:11]
df=pd.DataFrame(data=x6_to_x11,columns=["X6","X7","X8","X9","X10","X11"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Features: History of Payment')
plt.show()
x12_to_x17=X[:,11:17]
df=pd.DataFrame(data=x12_to_x17,columns=["X12","X13","X14","X15","X16","X17"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Features: Amount of Bill Statements')
plt.show()
x18_to_x23=X[:,17:23]
df=pd.DataFrame(data=x12_to_x17,columns=["X18","X19","X20","X21","X22","X23"])
splot=sea.boxplot(data=df, orient="h",palette="husl")
plt.title('BoxPlot Distribution of Features: Amount of Previous Payments')
plt.show()