def scoreHists(scoresFN,outFN,numBins,geneNames,scoreType):
'''Read through a scores file, and separate into all pairwise comparisons. Then plot hist of each.'''
# currently, this seems to require a display for interactive
# plots. would be nice to make it run without that...
pairD = readScorePairs(scoresFN,geneNames,scoreType)
pyplot.ioff() # turn off interactive mode
with PdfPages(outFN) as pdf:
for key in pairD:
fig = pyplot.figure()
pyplot.hist(pairD[key],bins=numBins)
pyplot.title('-'.join(key))
pdf.savefig()
pyplot.close()
python类PdfPages()的实例源码
def plot(self, dataset, path, show=False):
with PdfPages(path) as pdf:
x_vals = dataset.data['T'].tolist()
y_vals = dataset.data[self.symbol].tolist()
plt.plot(x_vals, y_vals, 'ro', alpha=0.4, markersize=4)
x_vals2 = np.linspace(min(x_vals), max(x_vals), 80)
fx = np.polyval(self._coeffs, x_vals2)
plt.plot(x_vals2, fx, linewidth=0.3, label='')
plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 4))
plt.legend(loc=3, bbox_to_anchor=(0, 0.8))
plt.title('$%s$ vs $T$' % self.display_symbol)
plt.xlabel('$T$ (K)')
plt.ylabel('$%s$ (%s)' % (self.display_symbol, self.units))
fig = plt.gcf()
pdf.savefig(fig)
plt.close()
if show:
webbrowser.open_new(path)
def generate_initial_report(df, plot_size_scalar, report_name):
'''
Print some initial summary data about our dataframe, plot a scatter_matrix,
and several box-plots / violin plots.
'''
# make the pdf
report = PdfPages(OUTPUT_DIR + report_name + '.pdf')
# Generate a scatter matrix
c_count = len(df.columns)
scatter_fig, ax_list = plt.subplots(figsize=(c_count*plot_size_scalar, c_count*plot_size_scalar))
pd.tools.plotting.scatter_matrix(df, diagonal='kde', ax=ax_list)
report.savefig(scatter_fig)
# Generate the box/violin overlay plot
box_fig = box_plots(df, plot_size_scalar, plot_size_scalar*1.5)
report.savefig(box_fig)
report.close()
def build_regression_report(report_name, relevant_col_names, training_data, training_answers):
'''
Given a report_name, a list of columns to regress on, and the required training_data
create a regression model using StatsModel. Plot the residuals and a QQ plot and write
the model.summary() to the report.
report_name: The name of the pdf
relevant_col_names: a list with the columns you care about in training_data
training_data: the training set
training_answers: y, assumed to be parallel to training_data
'''
report = PdfPages(OUTPUT_DIR + report_name + '.pdf')
reduced_dataset = training_data.filter(relevant_col_names)
model, data = build_model(training_answers, reduced_dataset)
summary_text = model.summary()
with open(OUTPUT_DIR + report_name + ".txt", "w") as text_file:
text_file.write(str(summary_text))
resid_fig = plot_resid(model, data)
report.savefig(resid_fig)
report.close()
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def missing_rate_plot(consensus_data, ordered_genomes, biotypes, missing_plot_tgt):
"""Missing genes/transcripts"""
base_title = 'Number of missing orthologs in consensus set'
gene_missing_df = json_biotype_counter_to_df(consensus_data, 'Gene Missing')
gene_missing_df.columns = ['biotype', 'Genes', 'genome']
transcript_missing_df = json_biotype_counter_to_df(consensus_data, 'Transcript Missing')
transcript_missing_df.columns = ['biotype', 'Transcripts', 'genome']
df = transcript_missing_df.merge(gene_missing_df, on=['genome', 'biotype'])
df = pd.melt(df, id_vars=['biotype', 'genome'])
ylabel = 'Number of genes or transcripts'
with missing_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
tot_df = df.groupby(['genome', 'biotype', 'variable']).aggregate(sum).reset_index()
generic_barplot(tot_df, pdf, '', ylabel, base_title, x='genome', y='value',
col='variable', row_order=ordered_genomes)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is None:
continue
biotype_df = biotype_df.groupby(['genome', 'variable']).aggregate(sum).reset_index()
title = base_title + ' for biotype {}'.format(biotype)
generic_barplot(biotype_df, pdf, '', ylabel, title, x='genome', y='value',
col='variable', row_order=ordered_genomes)
def printPDF(self):
dlg = QFileDialog()
pdf_FileName = dlg.getSaveFileName(self,"Save as PDF","","*.pdf")
if pdf_FileName:
_dirname = os.path.dirname(pdf_FileName)
_filename = os.path.splitext(os.path.basename(pdf_FileName))[0]
pdf_FileName1 = _dirname + '/' + _filename + ' 1.pdf'
pdf_FileName2 = _dirname + '/' + _filename + ' 2.pdf'
QApplication.setOverrideCursor(Qt.WaitCursor)
with PdfPages(pdf_FileName)as pdf:
pdf.savefig(self.dyfig)
pdf.savefig(self.dyfig2)
self.signalPrintEnd.set()
QApplication.restoreOverrideCursor()
def make_plots(self):
if self.plots_requested:
logging.info("Creating plots", extra={'oname': self.name})
# sequence-function maps
if self.scoring_method != "counts":
if "synonymous" in self.labels:
pdf = PdfPages(os.path.join(self.plot_dir,
"sequence_function_map_aa.pdf")
)
for condition in self.children:
self.sfmap_wrapper(condition=condition.name, pdf=pdf,
coding=True)
pdf.close()
if "variants" in self.labels:
pdf = PdfPages(os.path.join(self.plot_dir,
"sequence_function_map_nt.pdf"))
for condition in self.children:
self.sfmap_wrapper(condition=condition.name, pdf=pdf,
coding=False)
pdf.close()
for s in self.selection_list():
s.make_plots()
def main(args):
table = read_table(args.table)
# Discard rows with any mutation within J at all
logger.info('%s rows read', len(table))
if not args.ignore_J:
# Discard rows with any mutation within J at all
table = table[table.J_SHM == 0][:]
logger.info('%s rows remain after discarding J%%SHM > 0', len(table))
if args.minimum_group_size is None:
total = len(table)
minimum_group_size = min(total // 1000, 100)
logger.info('Skipping genes with less than %s assignments', minimum_group_size)
else:
minimum_group_size = args.minimum_group_size
n = 0
too_few = 0
with PdfPages(args.pdf) as pages:
for gene, group in table.groupby('V_gene'):
if len(group) < minimum_group_size:
too_few += 1
continue
fig = plot_difference_histogram(group, gene)
n += 1
FigureCanvasPdf(fig).print_figure(pages, bbox_inches='tight')
logger.info('%s plots created (%s skipped because of too few sequences)', n, too_few)
def export_results_dialog(self):
"""
Bring up transient dialog for exporting results.
"""
out_dir, _ = QW.QFileDialog.getSaveFileName(self, "Export Experiment Output", "", "*")
try:
os.mkdir(out_dir)
data_file = open(os.path.join(out_dir,"fit_param.csv"), "w")
data_file.write(self._fit.fitter.fit_as_csv)
data_file.close()
plot_save = PdfPages(os.path.join(out_dir,"main_plot.pdf"))
fig, ax = self._fit.fitter.plot()
plot_save.savefig(fig)
plot_save.close()
plot_save = PdfPages(os.path.join(out_dir,"corner_plot.pdf"))
fig = self._fit.fitter.corner_plot()
plot_save.savefig(fig)
plot_save.close()
log_save = open(os.path.join(out_dir,"session.log"),"w")
spew = self._main_widgets.message_box.toPlainText()
log_save.write(spew)
log_save.close()
except Exception as ex:
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
err = template.format(type(ex).__name__,ex.args)
error_message = QW.QMessageBox.warning(self,err, QW.QMessageBox.Ok)
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def multipage(filename, figs=None):
pp = PdfPages(filename)
if figs is None:
figs = [plt.figure(n) for n in plt.get_fignums()]
for fig in figs:
fig.savefig(pp, format='pdf')
pp.close()
def plot_2d_histogram(hist, x_lim, y_lim, title, x_label, y_label, pdf_file_name):
"""Plot 2d histogram with matplotlib
:param hist: input numpy histogram = x_bin_edges, y_bin_edges, bin_entries_2dgrid
:param tuple x_lim: range tuple of x-axis (min,max)
:param tuple y_lim: range tuple of y-axis (min,max)
:param str title: title of plot
:param str x_label: Label for histogram x-axis
:param str y_label: Label for histogram y-axis
:param str pdf_file_name: if set, will store the plot in a pdf file
"""
# import matplotlib here to prevent import before setting backend in
# core.execution.run_eskapade
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
fig = plt.figure(figsize=(7, 5))
try:
x_ranges = hist[0]
y_ranges = hist[1]
grid = hist[2]
except BaseException:
raise ValueError('Cannot extract ranges and grid from input histogram')
ax = plt.gca()
ax.pcolormesh(x_ranges, y_ranges, grid)
ax.set_ylim(y_lim)
ax.set_xlim(x_lim)
ax.set_title(title)
plt.xlabel(x_label, fontsize=14)
plt.ylabel(y_label, fontsize=14)
plt.grid()
if pdf_file_name:
pdf_file = PdfPages(pdf_file_name)
plt.savefig(pdf_file, format='pdf', bbox_inches='tight', pad_inches=0)
plt.close()
pdf_file.close()
def save(self, image_file):
# save image
# pp = PdfPages(image_file)
# plt.savefig(pp, format='pdf')
# pp.close()
self.fig.savefig(image_file, dpi=75)
def save(self, image_file):
# save image
# pp = PdfPages(image_file)
# plt.savefig(pp, format='pdf')
# pp.close()
self.fig.savefig(image_file, dpi=75)
def save(self, image_file):
# save image
# pp = PdfPages(image_file)
# plt.savefig(pp, format='pdf')
# pp.close()
self.fig.savefig(image_file, dpi=75)
def get_complete_output(reference_frames,output_frames,mode,pr_resolution,outdir):
print ("Processing Layer: %s" % mode_names[mode])
start_time = time.time()
pr_x, pr_y, pr_AUC= get_pr(reference_frames,output_frames,mode,pr_resolution)
# create a plot
plt.plot(pr_x,pr_y)
plt.title(mode_names[mode])
plt.xlabel('recall')
plt.ylabel('precision')
plt.grid()
ax = plt.gca()
ax.set_ylim([-0.05, 1.05])
ax.set_xlim([-0.05, 1.05])
ax.set(adjustable='box-forced', aspect='equal')
gc = plt.gcf()
gc.set_size_inches(7, 7)
str1 = "AUC=%.3f" % (pr_AUC)
plt.legend([str1], loc='upper right')
pp = PdfPages(os.path.join(outdir,'curve_'+mode_names[mode]+'.pdf'))
pp.savefig(plt.gcf())
pp.close()
plt.close()
# save complete log
arr = np.array([pr_x,pr_y])
np.savetxt(os.path.join(outdir,'log_'+mode_names[mode]+'.tsv'), np.transpose(arr), fmt='%.8f', delimiter="\t", header="recall\tprecision", comments='')
print("AUC = %.3f" % pr_AUC)
print("Done --- %s seconds ---" % (time.time() - start_time))
return pr_x, pr_y, pr_AUC
################################################################################
# MAIN
################################################################################
def setUpClass(cls):
super(LocalQasmSimulatorTest, cls).setUpClass()
cls.pdf = PdfPages(cls.moduleName + '.pdf')
def write_score(name, gold_labels, pred_scores, classes, average_classes):
classes, average_classes = np.array(classes), np.array(average_classes)
gold_scores = LabelBinarizer().fit(classes).transform(gold_labels)
pred_labels = classes[np.argmax(pred_scores, axis=1)]
with closing(Tee('{}.txt'.format(name), 'w')):
precision, recall, fscore, _ = precision_recall_fscore_support(gold_labels, pred_labels, labels=classes)
for t in zip(classes, precision, recall, fscore):
print('{}: P={:.2f}, R={:.2f}, F1={:.2f}'.format(*t))
print('Accuracy: {:.4f}'.format(accuracy_score(gold_labels, pred_labels)))
print('F1 average: {:.4f}'.format(np.mean(fscore[LabelEncoder().fit(classes).transform(average_classes)])))
with PdfPages('{}.pdf'.format(name)) as pdf:
fpr = {}
tpr = {}
roc_auc = {}
for i in range(len(classes)):
fpr[i], tpr[i], _ = roc_curve(gold_scores[:, i], pred_scores[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
fpr['micro'], tpr['micro'], _ = roc_curve(gold_scores.ravel(), pred_scores.ravel())
roc_auc['micro'] = auc(fpr['micro'], tpr['micro'])
plt.figure()
plt.plot(fpr['micro'], tpr['micro'], label='micro-average (area = {:.2f})'.format(roc_auc['micro']))
for i in range(len(classes)):
plt.plot(fpr[i], tpr[i], label='{0} (area = {1:.2f})'.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend(loc='lower right')
pdf.savefig()
def multipage(filename, figs=None):
pp = PdfPages(filename)
if figs is None:
figs = [plt.figure(n) for n in plt.get_fignums()]
for fig in figs:
fig.savefig(pp, format='pdf')
pp.close()
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def consensus_support_plot(consensus_data, ordered_genomes, biotypes, modes, title, tgt):
"""grouped violin plots of original intron / intron annotation / exon annotation support"""
def adjust_plot(g, this_title):
g.set_xticklabels(rotation=90)
g.fig.suptitle(this_title)
g.fig.subplots_adjust(top=0.9)
for ax in g.axes.flat:
ax.set_ylabel('Percent supported')
ax.set_ylim(-1, 101)
dfs = []
for i, mode in enumerate(modes):
df = json_to_df_with_biotype(consensus_data, mode)
if i > 0:
df = df[mode]
dfs.append(df)
df = pd.concat(dfs, axis=1)
df = pd.melt(df, value_vars=modes, id_vars=['genome', 'biotype'])
with tgt.open('w') as outf, PdfPages(outf) as pdf:
if len(ordered_genomes) > 1:
g = sns.factorplot(data=df, y='value', x='genome', col='variable', col_wrap=2, kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
else:
g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
adjust_plot(g, title)
multipage_close(pdf, tight_layout=False)
title += ' for {}'
for biotype in biotypes:
this_title = title.format(biotype)
biotype_df = biotype_filter(df, biotype)
if biotype_df is not None:
if len(ordered_genomes) > 1:
g = sns.factorplot(data=biotype_df, y='value', x='genome', col='variable', col_wrap=2,
kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0)
else:
g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True,
sharey=True, row_order=ordered_genomes, cut=0)
adjust_plot(g, this_title)
multipage_close(pdf, tight_layout=False)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def tm_para_plot(tm_data, ordered_genomes, biotypes, para_tgt):
"""transMap paralogy plots"""
legend_labels = ['= 1', '= 2', '= 3', u'\u2265 4']
title_string = 'Proportion of transcripts that have multiple alignments'
biotype_title_string = 'Proportion of {} transcripts that have multiple alignments'
df = json_biotype_nested_counter_to_df(tm_data, 'Paralogy')
# we want a dataframe where each row is the counts, in genome order
# we construct the transpose first
r = []
df['Paralogy'] = pd.to_numeric(df['Paralogy'])
# make sure genomes are in order
df['genome'] = pd.Categorical(df['genome'], ordered_genomes, ordered=True)
df = df.sort_values('genome')
for biotype, biotype_df in df.groupby('biotype'):
for genome, genome_df in biotype_df.groupby('genome'):
high_para = genome_df[genome_df.Paralogy >= 4]['count'].sum()
counts = dict(zip(genome_df['Paralogy'], genome_df['count']))
r.append([biotype, genome, counts.get(1, 0), counts.get(2, 0), counts.get(3, 0), high_para])
df = pd.DataFrame(r, columns=['biotype', 'genome', '1', '2', '3', u'\u2265 4'])
sum_df = df.groupby('genome', sort=False).aggregate(sum).T
plot_fn = generic_unstacked_barplot if len(df.columns) <= 5 else generic_stacked_barplot
box_label = 'Number of\nalignments'
with para_tgt.open('w') as outf, PdfPages(outf) as pdf:
plot_fn(sum_df, pdf, title_string, legend_labels, 'Number of transcripts', ordered_genomes, box_label)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is not None:
biotype_df = biotype_df.drop(['genome', 'biotype'], axis=1).T
title_string = biotype_title_string.format(biotype)
plot_fn(biotype_df, pdf, title_string, legend_labels, 'Number of transcripts', ordered_genomes,
box_label)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def tm_gene_family_plot(tm_data, ordered_genomes, biotypes, gene_family_tgt):
"""transMap gene family collapse plots."""
try:
df = json_biotype_nested_counter_to_df(tm_data, 'Gene Family Collapse')
except ValueError: # no gene family collapse. probably the test set.
with gene_family_tgt.open('w') as outf:
pass
return
df['Gene Family Collapse'] = pd.to_numeric(df['Gene Family Collapse'])
tot_df = df[['Gene Family Collapse', 'genome', 'count']].\
groupby(['genome', 'Gene Family Collapse']).aggregate(sum).reset_index()
tot_df = tot_df.sort_values('Gene Family Collapse')
with gene_family_tgt.open('w') as outf, PdfPages(outf) as pdf:
g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=tot_df, kind='bar',
col_order=ordered_genomes, col_wrap=4)
g.fig.suptitle('Number of genes collapsed during gene family collapse')
g.set_xlabels('Number of genes collapsed to one locus')
g.set_ylabels('Number of genes')
multipage_close(pdf)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is None:
continue
biotype_df = biotype_df.sort_values('Gene Family Collapse')
g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=biotype_df, kind='bar',
col_order=ordered_genomes, col_wrap=4)
g.fig.suptitle('Number of genes collapsed during gene family collapse for {}'.format(biotype))
g.set_xlabels('Number of genes collapsed to one locus')
g.set_ylabels('Number of genes')
multipage_close(pdf)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def tx_modes_plot(consensus_data, ordered_genomes, tx_mode_plot_tgt):
ordered_groups = ['transMap', 'transMap+TM', 'transMap+TMR', 'transMap+TM+TMR', 'TM', 'TMR', 'TM+TMR', 'CGP', 'PB',
'Other']
ordered_groups = OrderedDict([[frozenset(x.split('+')), x] for x in ordered_groups])
def split_fn(s):
return ordered_groups.get(frozenset(s['Transcript Modes'].replace('aug', '').split(',')), 'Other')
modes_df = json_biotype_counter_to_df(consensus_data, 'Transcript Modes')
df = modes_df.pivot(index='genome', columns='Transcript Modes').transpose().reset_index()
df['Modes'] = df.apply(split_fn, axis=1)
df = df[['Modes'] + ordered_genomes]
ordered_values = [x for x in ordered_groups.itervalues() if x in set(df['Modes'])]
with tx_mode_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
title_string = 'Transcript modes in protein coding consensus gene set'
ylabel = 'Number of transcripts'
if len(ordered_genomes) > 1:
df['Ordered Modes'] = pd.Categorical(df['Modes'], ordered_values, ordered=True)
df = df.sort_values('Ordered Modes')
df = df[['Ordered Modes'] + ordered_genomes].set_index('Ordered Modes')
df = df.fillna(0)
generic_stacked_barplot(df, pdf, title_string, df.index, ylabel, ordered_genomes, 'Transcript mode(s)',
bbox_to_anchor=(1.25, 0.7))
else:
generic_barplot(pd.melt(df, id_vars='Modes'), pdf, 'Transcript mode(s)', ylabel, title_string, x='Modes',
y='value', order=ordered_values)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def split_genes_plot(tm_data, ordered_genomes, split_plot_tgt):
with split_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
df = json_biotype_counter_to_df(tm_data, 'Split Genes')
df.columns = ['category', 'count', 'genome']
title = 'Split genes'
if len(ordered_genomes) > 1:
g = generic_barplot(pdf=pdf, data=df, x='genome', y='count', col='category', xlabel='', col_wrap=2,
sharey=False, ylabel='Number of transcripts or genes', row_order=ordered_genomes,
title=title)
else:
g = generic_barplot(pdf=pdf, data=df, x='category', y='count', ylabel='Number of transcripts or genes',
title=title, xlabel='Category')
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def pb_support_plot(consensus_data, ordered_genomes, pb_genomes, pb_support_tgt):
with pb_support_tgt.open('w') as outf, PdfPages(outf) as pdf:
pb_genomes = [x for x in ordered_genomes if x in pb_genomes] # fix order
df = json_biotype_counter_to_df(consensus_data, 'IsoSeq Transcript Validation')
df.columns = ['IsoSeq Transcript Validation', 'Number of transcripts', 'genome']
ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', hue='IsoSeq Transcript Validation',
kind='bar', row_order=pb_genomes)
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('Isoforms validated by at least one IsoSeq read')
multipage_close(pdf, tight_layout=False)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def completeness_plot(consensus_data, ordered_genomes, biotypes, completeness_plot_tgt, gene_biotype_map,
transcript_biotype_map):
def adjust_plot(g, gene_count, tx_count):
for ax, c in zip(*[g.axes[0], [gene_count, tx_count]]):
_ = ax.set_ylim(0, c)
ax.spines['top'].set_edgecolor('#e74c3c')
ax.spines['top'].set_linewidth(2)
ax.spines['top'].set_visible(True)
ax.spines['top'].set_linestyle('dashed')
df = json_grouped_biotype_nested_counter_to_df(consensus_data, 'Completeness')
with completeness_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
tot_df = df.groupby(by=['genome', 'category']).aggregate(np.sum).reset_index()
tot_df = sort_long_df(tot_df, ordered_genomes)
title = 'Number of comparative genes/transcripts present'
g = generic_barplot(pdf=pdf, data=tot_df, x='genome', y='count', col='category', xlabel='',
sharey=False, ylabel='Number of genes/transcripts', title=title,
col_order=['Gene', 'Transcript'], close=False, palette=choose_palette(ordered_genomes))
adjust_plot(g, len(gene_biotype_map), len(transcript_biotype_map))
multipage_close(pdf, tight_layout=False)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is not None:
biotype_df = sort_long_df(biotype_df, ordered_genomes)
gene_biotype_count = len({i for i, b in gene_biotype_map.iteritems() if b == biotype})
tx_biotype_count = len({i for i, b in transcript_biotype_map.iteritems() if b == biotype})
title = 'Number of comparative genes/transcripts present for biotype {}'.format(biotype)
g = generic_barplot(pdf=pdf, data=biotype_df, x='genome', y='count', col='category', xlabel='',
sharey=False, ylabel='Number of genes/transcripts',
title=title, col_order=['Gene', 'Transcript'], close=False,
palette=choose_palette(ordered_genomes))
adjust_plot(g, gene_biotype_count, tx_biotype_count)
multipage_close(pdf, tight_layout=False)
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def indel_plot(consensus_data, ordered_genomes, indel_plot_tgt):
with indel_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
tm_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['transMap Indels'], orient='index').T
for genome in ordered_genomes])
tm_df['genome'] = ordered_genomes
tm_df['transcript set'] = ['transMap'] * len(tm_df)
consensus_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['Consensus Indels'], orient='index').T
for genome in ordered_genomes])
consensus_df['genome'] = ordered_genomes
consensus_df['transcript set'] = ['Consensus'] * len(consensus_df)
df = pd.concat([consensus_df, tm_df])
df = pd.melt(df, id_vars=['genome', 'transcript set'],
value_vars=['CodingDeletion', 'CodingInsertion', 'CodingMult3Indel'])
df.columns = ['Genome', 'Transcript set', 'Type', 'Percent of transcripts']
g = sns.factorplot(data=df, x='Genome', y='Percent of transcripts', col='Transcript set',
hue='Type', kind='bar', row_order=ordered_genomes,
col_order=['transMap', 'Consensus'])
g.set_xticklabels(rotation=90)
g.fig.subplots_adjust(top=.8)
g.fig.suptitle('Coding indels')
multipage_close(pdf, tight_layout=False)
###
# shared plotting functions
###
def plot_to_pdf(Y_valid, y_predict_probs, filename):
"""
Plots the predict proba and precision recall
curve on a single graph
"""
with PdfPages(filename + '.pdf') as pdf:
y_predict = y_predict_probs
inclf = filename
plot_predict_proba(y_predict, inclf, pdf=pdf)
plot_precision_recall_n(Y_valid.values.ravel(),
y_predict_probs, inclf, pdf=pdf)
def plot(self, path):
"""
:param path:
:return:
"""
self.run()
with warnings.catch_warnings():
warnings.simplefilter('ignore')
with PdfPages(path) as pdf:
for i, tup in enumerate(zip(self.V, self.e)):
V, e = tup
if V is not None:
V.plot();
plt.subplots_adjust(right=0.7)
plt.figtext(0.73, 0.73, '%s: %.2f' % (self.eval, e), fontsize=14)
plt.figtext(0.73, 0.45, str(V), fontsize=10)
plt.figtext(0.73, 0.29, '\n'.join(['%s: %s' % (n, str(v)) for n,v in zip(self.names, self.combinations[i]) ]), fontsize=10)
pdf.savefig(plt.gcf())
else:
f,ax = plt.subplot(1,1)
plt.figtext(1, 0.73, 'No Result for combination: ', fontsize=14)
plt.figtext(0.1, 0.45, '\n'.join(['%s: %s' % (n, str(v)) for n,v in zip(self.names, self.combinations[i]) ]), fontsize=10)
pdf.savefig()
if self.verbose:
sys.stdout.write('%d/%d plots drawn.\n' % (i+1, self.n))
def plot_save(path, figs=None, dpi=180, tight_plot=False, clear_all=True, log=True):
"""
Parameters
----------
clear_all: bool
if True, remove all saved figures from current figure list
in matplotlib
"""
try:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
if tight_plot:
plt.tight_layout()
if os.path.exists(path) and os.path.isfile(path):
os.remove(path)
pp = PdfPages(path)
if figs is None:
figs = [plt.figure(n) for n in plt.get_fignums()]
for fig in figs:
fig.savefig(pp, format='pdf', bbox_inches="tight")
pp.close()
if log:
sys.stderr.write('Saved pdf figures to:%s \n' % str(path))
if clear_all:
plt.close('all')
except Exception as e:
sys.stderr.write('Cannot save figures to pdf, error:%s \n' % str(e))
def __init__(self, pdf):
"""Class for plotting utilities on the top of matplotlib. Plots are saved in the specified file through the PDF backend.
:param self: object.
:param pdf: Output pdf.
:returns: The report object.
:rtype: Report
"""
self.pdf = pdf
self.plt = plt
self.pages = PdfPages(pdf)