def plot_similardishes(idx,xlim):
match = yum_ingr2.iloc[yum_cos[idx].argsort()[-21:-1]][::-1]
newidx = match.index.get_values()
match['cosine'] = yum_cos[idx][newidx]
match['rank'] = range(1,1+len(newidx))
label1, label2 =[],[]
for i in match.index:
label1.append(match.ix[i,'cuisine'])
label2.append(match.ix[i,'recipeName'])
fig = plt.figure(figsize=(10,10))
ax = sns.stripplot(y='rank', x='cosine', data=match, jitter=0.05,
hue='cuisine',size=15,orient="h")
ax.set_title(yum_ingr2.ix[idx,'recipeName']+'('+yum_ingr2.ix[idx,'cuisine']+')',fontsize=18)
ax.set_xlabel('Flavor cosine similarity',fontsize=18)
ax.set_ylabel('Rank',fontsize=18)
ax.yaxis.grid(color='white')
ax.xaxis.grid(color='white')
for label, y,x, in zip(label2, match['rank'],match['cosine']):
ax.text(x+0.001,y-1,label, ha = 'left')
ax.legend(loc = 'lower right',prop={'size':14})
ax.set_ylim([20,-1])
ax.set_xlim(xlim)
python类stripplot()的实例源码
def plot_null(fnull, fcore, ax=None):
"""
Plot the null shared response overlayed with the actual number of
shared responders.
"""
null = pd.read_csv(fnull, sep='\t')
core = pd.read_csv(fcore, sep='\t', index_col=0)
ncore = core.groupby('overall').size()
for i in [-1, 0, 1]:
if i not in ncore:
ncore.loc[i] = 0
order = ['health', 'mixed', 'disease']
if ax is None:
fig, ax = plt.subplots()
sns.stripplot(data=null, x='type', y='n', order=order, jitter=True, ax=ax, alpha=0.2)
ax.scatter([0, 1, 2], [ncore.loc[-1], ncore.loc[0], ncore.loc[1]],
c='k', marker='D', s=25, zorder=10)
return ax
def plot_commits_by_engineer(self):
plt.figure(figsize=(16, 14))
sns.stripplot(x="CommitDate", y="Engineer", data=self.tickets,
jitter=True)
def response_surface_analysis(df):
"""Perform response surface analysis on df."""
def tally_results(df):
features = [f for f in list(df.columns.values)
if f not in ['classifier', 'test_accuracy']]
classifiers = ['Decision Tree', 'Linear SVC', 'Logistic Regression',
'Multinomial NB', 'Random Forest', 'Voting Classifier']
for classifier in classifiers:
from_class = df['classifier'] == classifier
class_mean = df[from_class]['test_accuracy'].mean()
for feature in features:
with_feature = df[(df[feature] == 1) & from_class]
wo_feature = df[(df[feature] == 0) & from_class]
acc_diff = (with_feature['test_accuracy'].sum()
- wo_feature['test_accuracy'].sum())
mean_diff = acc_diff / 255 + class_mean
yield classifier, feature, mean_diff
results = pd.DataFrame([res for res in tally_results(df)],
columns=['classifier', 'feature', 'effect'])
class_order = list(df.groupby('classifier')['test_accuracy']
.mean()
.sort_values(ascending=False)
.index)
results['classifier'] = pd.Categorical(results['classifier'],
categories=class_order)
feat_order = list(results.groupby('feature')['effect']
.mean()
.sort_values(ascending=False)
.index)
results['feature'] = pd.Categorical(results['feature'],
categories=feat_order)
results.sort_values(['feature', 'classifier'], inplace=True)
print(results)
sns.stripplot('effect', 'feature', hue='classifier', data=results)
sns.plt.legend()
sns.plt.show()
def aga_attachedness(
adata,
attachedness_type='scaled',
color_map=None,
show=None,
save=None):
"""Attachedness of aga groups.
"""
if attachedness_type == 'scaled':
attachedness = adata.uns['aga_attachedness']
elif attachedness_type == 'distance':
attachedness = adata.uns['aga_distances']
elif attachedness_type == 'absolute':
attachedness = adata.uns['aga_attachedness_absolute']
else:
raise ValueError('Unkown attachedness_type {}.'.format(attachedness_type))
adjacency = adata.uns['aga_adjacency']
matrix(attachedness, color_map=color_map, show=False)
for i in range(adjacency.shape[0]):
neighbors = adjacency[i].nonzero()[1]
pl.scatter([i for j in neighbors], neighbors, color='green')
utils.savefig_or_show('aga_attachedness', show=show, save=save)
# as a stripplot
if False:
pl.figure()
for i, ds in enumerate(attachedness):
ds = np.log1p(ds)
x = [i for j, d in enumerate(ds) if i != j]
y = [d for j, d in enumerate(ds) if i != j]
pl.scatter(x, y, color='gray')
neighbors = adjacency[i]
pl.scatter([i for j in neighbors],
ds[neighbors], color='green')
pl.show()
def stripboxplot(x, y, data, ax=None, significant=None, **kwargs):
"""
Overlay a stripplot on top of a boxplot.
"""
ax = sb.boxplot(
x=x,
y=y,
data=data,
ax=ax,
fliersize=0,
**kwargs
)
plot = sb.stripplot(
x=x,
y=y,
data=data,
ax=ax,
jitter=kwargs.pop("jitter", 0.05),
color=kwargs.pop("color", "0.3"),
**kwargs
)
if data[y].min() >= 0:
hide_negative_y_ticks(plot)
if significant is not None:
add_significance_indicator(plot=plot, significant=significant)
return plot
figure.samplesize_auc_extent_direction.py 文件源码
项目:microbiomeHD
作者: cduvallet
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def stemplot(x, y, data, order, ax, palette, marker='o', size=7):
"""
Wrapper to make one stemplot with colored dashed lines leading
to colored marker.
Parameters
----------
x, y : str
used in call to sns.stripplot() with data
data : pandas dataframe
Should have the values that are given for 'order' in the index,
or a column called 'label' with those values.
order : list
order of x values
ax : Axis object
axis handle to plot values on
palette : dict
{values in x-axis : color mapping value}
marker : str
marker value to pass to stripplot
size : int
size of marker
Returns
-------
ax
"""
if 'label' in data:
data.index = data['label']
sns.stripplot(x=x, y=y, data=data, order=order, ax=ax, palette=palette,
size=size, marker=marker)
_, stemlines, baseline = ax.stem(data.loc[order, y],
markerfmt=" ", linefmt=":")
# Remove stemplot baseline
plt.setp(baseline, visible=False)
# Change stem colors
colorslist = [palette[i] for i in order]
_ = [plt.setp(stemlines[i], 'color', colorslist[i])
for i in range(len(colorslist))]
_ = [i.set_alpha(0.75) for i in stemlines]
return ax
def plot_alphadf(alphasdf, col_order, labeldict, metric='alpha'):
"""
Plot faceted alpha diversity.
Parameters
----------
alphasdf : pandas DataFrame
columns ['study', 'alpha', 'DiseaseState']
col_order : list
dataset IDs in the order they should be plotted
labeldict : dict
dictionary with {dataset: label}
mteric : str
alpha diversity metric, to use in labeling y axis
Returns
-------
fig : Figure
"""
sns.set_style('white')
g = sns.FacetGrid(alphasdf, col='study', col_wrap=6,
col_order=col_order, sharex=False, sharey=False)
g = g.map(sns.boxplot, "DiseaseState", "alpha")
g = g.map(sns.stripplot, "DiseaseState", "alpha", split=True, jitter=True,
size=5, linewidth=0.6)
fig = plt.gcf()
fig.set_size_inches(14.2, 9)
# Fix y-axis gridlines
axs = g.axes
for i in range(len(axs)):
ax = axs[i]
yticks = ax.get_yticks()
# If bottom limit is between 0 and 1 (i.e. not simpson)
if not (yticks[0] < 1 and yticks[0] > 0):
ax.set_ylim(floor(yticks[0]), floor(yticks[-1]))
if yticks[0] < 0:
ax.set_ylim(0, floor(yticks[-1]))
yticks = ax.get_yticks()
if (yticks[0] < 1 and yticks[0] > 0):
ax.set_yticks(yticks[1::2])
else:
ax.set_yticks(yticks[::2])
# Need some space on the y-axis for p-values
ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])
# Update title
oldtitle = ax.get_title()
newtitle = labeldict[oldtitle.split('=')[1].strip()]
ax.set_title(newtitle)
# Update y label
if i % 6 == 0:
ax.set_ylabel(metric)
plt.tight_layout()
return fig
figure.ubiquity_abundance_boxplots.py 文件源码
项目:microbiomeHD
作者: cduvallet
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def plot_ubiq_abun_boxplot(tidy, metric, calculation):
"""
Plot boxplot where x-axis is 'overall_significance' of genus, and values
are either ubiquity or abundance in tidy (with the respective metric and
calculation type)
Parameters
----------
tidy : pandas dataframe
has columns overall_significance, value, patient, metric, and calculation
metric : str
'abundance' or 'ubiquity'
calculation: str
'from_pooled_mean' or 'mean_of_datasets'
Returns
-------
ax : Axis object
"""
fig, ax = plt.subplots(figsize=(5.5,4))
tmp = tidy.query('metric == @metric')\
.query('calculation == @calculation')\
.query('patient == "total"')
boxprops = {'edgecolor': 'k', 'facecolor': 'w'}
lineprops = {'color': 'k'}
# Plot log10(abundance)
if metric == 'abundance':
tmp.loc[tmp.index, 'value'] = tmp['value'].apply(np.log10)
sns.boxplot(data=tmp, x='overall_significance', y='value',
fliersize=0, ax=ax, color='w',
order=['health', 'disease', 'mixed', 'not_sig'],
**{'boxprops': boxprops, 'medianprops': lineprops,
'whiskerprops': lineprops, 'capprops': lineprops})
sns.stripplot(data=tmp, x='overall_significance', y='value',
jitter=True, linewidth=0.6, split=True, ax=ax,
order=['health', 'disease', 'mixed', 'not_sig'],
color='w')
return fig, ax