def plot_count_fig(tasks):
"""
Create count plot, as a 2-row x 3-col bar plot of data points for each k in each covar.
Parameters
----------
tasks: list(dict)
Returns
-------
Matplotlib Figure object.
"""
sns.set(context='talk', style='whitegrid')
df = pd.DataFrame(filter_dict_list_by_keys(tasks, ['k', 'covar_type', 'covar_tied']))
df = df.loc[:, ['k', 'covar_type', 'covar_tied', 'bic', 'aic']]
df['covar_type'] = [x.capitalize() for x in df['covar_type']]
df['covar_tied'] = [['Untied', 'Tied'][x] for x in df['covar_tied']]
f = sns.factorplot(x='k', kind='count', col='covar_type', row='covar_tied', data=df,
row_order=['Tied', 'Untied'], col_order=['Full', 'Diag', 'Spher'], legend=True, legend_out=True,
palette='Blues_d')
f.set_titles("{col_name}-{row_name}")
f.set_xlabels("Num. of Clusters (K)")
return f.fig
python类factorplot()的实例源码
def plot_grid_scores(model, x, y=None, hue=None, row=None, col=None, col_wrap=None,
**kwargs):
'''
Wrapper around seaborn.factorplot.
Parameters
----------
model : Pipeline or Estimator
x, hue, row, col : str
parameters grid searched over
y : str
the target of interest, default `'mean_'`
Returns
-------
g : seaborn.FacetGrid
'''
scores = unpack_grid_scores(model)
y = y or 'mean_'
return sns.factorplot(x=x, y=y, hue=hue, row=row, col=col, data=scores,
col_wrap=col_wrap, **kwargs)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def denovo_plot(consensus_data, ordered_genomes, denovo_tgt):
with denovo_tgt.open('w') as outf, PdfPages(outf) as pdf:
df = json_biotype_nested_counter_to_df(consensus_data, 'denovo')
# fix column names because json_biotype_nested_counter_to_df makes assumptions
df.columns = ['Result', 'Number of transcripts', 'Augustus mode', 'genome']
has_pb = len(set(df['Augustus mode'])) == 2
if len(set(df.genome)) > 1: # if we ran in PB only, we may not have multiple genomes
if has_pb is True:
ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', kind='bar', col='Result',
hue='Augustus mode', col_wrap=2, row_order=ordered_genomes, sharex=True,
sharey=False)
else:
ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', kind='bar', col='Result',
col_wrap=2, row_order=ordered_genomes, sharex=True, sharey=False)
else:
if has_pb is True:
ax = sns.factorplot(data=df, x='Result', y='Number of transcripts', kind='bar', hue='Augustus mode')
else:
ax = sns.factorplot(data=df, x='Result', y='Number of transcripts', kind='bar')
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('Incorporation of de-novo predictions')
ax.fig.subplots_adjust(top=0.9)
multipage_close(pdf, tight_layout=False)
def _plotWeekdayStats(stats, columns, groupBy=True):
dataToPlot = stats.copy()
# Group by weekday and rename date column
if groupBy:
dataToPlot = dataToPlot.groupby(stats['date'].dt.weekday).mean()
dataToPlot = dataToPlot.reset_index().rename(columns={'date':'weekday'})
# change stats from columns to row attribute
dataToPlot = pd.melt(dataToPlot, id_vars=['weekday'], value_vars=columns,
var_name='stats', value_name='val')
# Rename stats and weekdays
dataToPlot['stats'].replace(NAMES, inplace=True)
dataToPlot['weekday'].replace(dayOfWeek, inplace=True)
# Plot
g = sns.factorplot(data=dataToPlot, x="weekday", y="val", col="stats",
order=dayOfWeekOrder, kind="point", sharey=False, col_wrap=3)
g.set_xticklabels(rotation=45)
g.set(xlabel='')
return g
#sns.plt.show()
def plotYearAndMonthStatsSleep(stats, columns=None):
"""
Plot aggregated (mean) stats by year and month.
:param stats: data to plot
"""
if not columns:
columns = ['sleep_efficiency', 'sleep_hours']
dataToPlot = _prepareYearAndMonthStats(stats, columns)
# Plot
g = sns.factorplot(data=dataToPlot, x="date", y="val", row="stats", kind="point", sharey=False)
g.set_xticklabels(rotation=45)
for ax in g.axes.flat:
ax.grid(b=True)
return g
#sns.plt.show()
def plot_aic_bic_fig(tasks):
"""
Creates AIC-BIC plot, as a 2-row x 3-col grid of point plots with 95% confidence intervals.
Parameters
----------
tasks: list(dict)
Returns
-------
Matplotlib Figure object
"""
sns.set(context='talk', style='whitegrid')
# Filter list of dicts to reduce the size of Pandas DataFrame
df = pd.DataFrame(filter_dict_list_by_keys(tasks, ['k', 'covar_type', 'covar_tied', 'bic', 'aic']))
df['covar_type'] = [x.capitalize() for x in df['covar_type']]
df['covar_tied'] = [['Untied', 'Tied'][x] for x in df['covar_tied']]
df['aic'] = df['aic'].astype('float')
df['bic'] = df['bic'].astype('float')
df = pd.melt(df, id_vars=['k', 'covar_type', 'covar_tied'], value_vars=['aic', 'bic'], var_name='metric')
f = sns.factorplot(x='k', y='value', col='covar_type', row='covar_tied', hue='metric', data=df,
row_order=['Tied', 'Untied'], col_order=['Full', 'Diag', 'Spher'], legend=True, legend_out=True,
ci=95, n_boot=100)
f.set_titles("{col_name}-{row_name}")
f.set_xlabels("Num. of Clusters (K)")
return f.fig
def mem_svg(self, table, column, outfile):
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sb
sb.set(style="whitegrid")
svgdat = (table.
rename(columns={ column : 'Memory' }).
groupby(['Dataset', 'Threads', 'Program']).
agg({ 'Memory' : max }).
reset_index())
svgdat = svgdat.assign(MemoryMB=svgdat['Memory'] / 1000000)
threads = svgdat.Threads.unique()
if len(threads) == 1:
plot = sb.factorplot(
x='Program', y='MemoryMB', col="Dataset",
data=svgdat, kind="bar", ci=None, sharey=True)
else:
plot = sb.factorplot(
x='Threads', y='MemoryMB', col="Dataset", hue="Program",
data=svgdat, kind="bar", ci=None, sharey=True)
if len(threads) == 1:
plot = plot.set_titles('')
plot = plot.set_xlabels('Threads')
plot = plot.set_ylabels('Memory (MB)')
plot = plot.set_xticklabels(rotation=90)
plot.fig.subplots_adjust(wspace=0.35)
plot.savefig(outfile)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def consensus_support_plot(consensus_data, ordered_genomes, biotypes, modes, title, tgt):
"""grouped violin plots of original intron / intron annotation / exon annotation support"""
def adjust_plot(g, this_title):
g.set_xticklabels(rotation=90)
g.fig.suptitle(this_title)
g.fig.subplots_adjust(top=0.9)
for ax in g.axes.flat:
ax.set_ylabel('Percent supported')
ax.set_ylim(-1, 101)
dfs = []
for i, mode in enumerate(modes):
df = json_to_df_with_biotype(consensus_data, mode)
if i > 0:
df = df[mode]
dfs.append(df)
df = pd.concat(dfs, axis=1)
df = pd.melt(df, value_vars=modes, id_vars=['genome', 'biotype'])
with tgt.open('w') as outf, PdfPages(outf) as pdf:
if len(ordered_genomes) > 1:
g = sns.factorplot(data=df, y='value', x='genome', col='variable', col_wrap=2, kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
else:
g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
adjust_plot(g, title)
multipage_close(pdf, tight_layout=False)
title += ' for {}'
for biotype in biotypes:
this_title = title.format(biotype)
biotype_df = biotype_filter(df, biotype)
if biotype_df is not None:
if len(ordered_genomes) > 1:
g = sns.factorplot(data=biotype_df, y='value', x='genome', col='variable', col_wrap=2,
kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0)
else:
g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
adjust_plot(g, this_title)
multipage_close(pdf, tight_layout=False)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def tm_gene_family_plot(tm_data, ordered_genomes, biotypes, gene_family_tgt):
"""transMap gene family collapse plots."""
try:
df = json_biotype_nested_counter_to_df(tm_data, 'Gene Family Collapse')
except ValueError: # no gene family collapse. probably the test set.
with gene_family_tgt.open('w') as outf:
pass
return
df['Gene Family Collapse'] = pd.to_numeric(df['Gene Family Collapse'])
tot_df = df[['Gene Family Collapse', 'genome', 'count']].\
groupby(['genome', 'Gene Family Collapse']).aggregate(sum).reset_index()
tot_df = tot_df.sort_values('Gene Family Collapse')
with gene_family_tgt.open('w') as outf, PdfPages(outf) as pdf:
g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=tot_df, kind='bar',
col_order=ordered_genomes, col_wrap=4)
g.fig.suptitle('Number of genes collapsed during gene family collapse')
g.set_xlabels('Number of genes collapsed to one locus')
g.set_ylabels('Number of genes')
multipage_close(pdf)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is None:
continue
biotype_df = biotype_df.sort_values('Gene Family Collapse')
g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=biotype_df, kind='bar',
col_order=ordered_genomes, col_wrap=4)
g.fig.suptitle('Number of genes collapsed during gene family collapse for {}'.format(biotype))
g.set_xlabels('Number of genes collapsed to one locus')
g.set_ylabels('Number of genes')
multipage_close(pdf)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def pb_support_plot(consensus_data, ordered_genomes, pb_genomes, pb_support_tgt):
with pb_support_tgt.open('w') as outf, PdfPages(outf) as pdf:
pb_genomes = [x for x in ordered_genomes if x in pb_genomes] # fix order
df = json_biotype_counter_to_df(consensus_data, 'IsoSeq Transcript Validation')
df.columns = ['IsoSeq Transcript Validation', 'Number of transcripts', 'genome']
ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', hue='IsoSeq Transcript Validation',
kind='bar', row_order=pb_genomes)
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('Isoforms validated by at least one IsoSeq read')
multipage_close(pdf, tight_layout=False)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def indel_plot(consensus_data, ordered_genomes, indel_plot_tgt):
with indel_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
tm_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['transMap Indels'], orient='index').T
for genome in ordered_genomes])
tm_df['genome'] = ordered_genomes
tm_df['transcript set'] = ['transMap'] * len(tm_df)
consensus_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['Consensus Indels'], orient='index').T
for genome in ordered_genomes])
consensus_df['genome'] = ordered_genomes
consensus_df['transcript set'] = ['Consensus'] * len(consensus_df)
df = pd.concat([consensus_df, tm_df])
df = pd.melt(df, id_vars=['genome', 'transcript set'],
value_vars=['CodingDeletion', 'CodingInsertion', 'CodingMult3Indel'])
df.columns = ['Genome', 'Transcript set', 'Type', 'Percent of transcripts']
g = sns.factorplot(data=df, x='Genome', y='Percent of transcripts', col='Transcript set',
hue='Type', kind='bar', row_order=ordered_genomes,
col_order=['transMap', 'Consensus'])
g.set_xticklabels(rotation=90)
g.fig.subplots_adjust(top=.8)
g.fig.suptitle('Coding indels')
multipage_close(pdf, tight_layout=False)
###
# shared plotting functions
###
def plot_models(accs):
cv_df = pd.DataFrame(accs, columns=['model','pipeline', 'score'])
sns.factorplot(x='model',y='score', hue='pipeline', data=cv_df, kind='bar')
def two_way_factor_plot(dataFrame, x, y, col, row):
sns.factorplot(x=x, y=y, col=col, row=row,
data=dataFrame, kind='bar', margin_titles=True, size=3, aspect=.8,
palette="Set3")
data_manag&visualization.py 文件源码
项目:-Python-Analysis_of_wine_quality
作者: ekolik
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def factorplots(wine_set):
seaborn.factorplot(x="quality", y="alcohol", data=wine_set, kind="strip")
plt.xlabel("Quality level of wine, 0-10 scale")
plt.ylabel("Alcohol level in wine, % ABV")
if wine_set.equals(red):
plt.title("Alcohol percent in each level of red wine's quality")
else:
plt.title("Alcohol percent in each level of white wine's quality")
plt.show()
def _plotMonthlyStats(stats, columns, groupBy=True):
dataToPlot = stats.copy()
# Group by month and rename date column
if groupBy:
dataToPlot = dataToPlot.groupby(stats['date'].dt.month).mean()
dataToPlot = dataToPlot.reset_index().rename(columns={'date': 'month'})
# change stats from columns to row attribute
dataToPlot = pd.melt(dataToPlot, id_vars=['month'], value_vars=columns,
var_name='stats', value_name='val')
# Rename stats and weekdays
dataToPlot['stats'].replace(NAMES, inplace=True)
dataToPlot['month'].replace(months, inplace=True)
order = [m for m in monthsOrder if m in dataToPlot['month'].unique()]
# Plot
g = sns.factorplot(data=dataToPlot, x="month", y="val", col="stats", order=order, kind="bar", sharey=False)
g.set_xticklabels(rotation=45)
g.set(xlabel='')
return g
#sns.plt.show()
# def _plotMonthlyStats(stats, columns):
# """
# Plot aggregated (mean) stats by month
# :param stats: data to plot
# :param columns: columns from stats to plot
# """
# MEASURE_NAME = 'month'
# months={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug',
# 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
# order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
# stats[MEASURE_NAME] = stats[MEASURE_NAME].map(months)
#
# order = [m for m in order if m in stats[MEASURE_NAME].unique()]
#
# f, axes = getAxes(2,2)
# for i, c in enumerate(columns):
# if c in NAMES:
# c = NAMES[c]
# g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i])
# g.set_xlabel('')
# sns.plt.show()