def predictions_vs_actual_regression(model_results, model_name, size=6, bins=None,
gridsize=30, outlier_ratio=None, **kwargs):
holdout = model_results.holdout_data
target = model_results.target
if outlier_ratio is not None:
holdout = utils.remove_outlier_rows(holdout, 'prediction', outlier_ratio)
holdout = utils.remove_outlier_rows(holdout, target, outlier_ratio)
sns.set(style="white", color_codes=True)
marginal_kws = dict(hist_kws=dict(edgecolor='black'))
plt.suptitle('{0}: Predictions vs Actual'.format(model_name), fontsize=14)
grid = sns.jointplot('prediction', target, holdout, 'hexbin', gridsize=gridsize,
size=size, bins=bins, space=0, marginal_kws=marginal_kws, **kwargs)
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) # shrink fig so cbar is visible
cax = grid.fig.add_axes([.95, .18, .04, .5]) # x, y, width, height
color_bar = sns.plt.colorbar(cax=cax)
if bins is None:
color_bar.set_label('count')
elif bins == 'log':
color_bar.set_label('log_10(count)')
return grid
python类jointplot()的实例源码
def graph_alternative(self, ctx, *, data : str):
'''WIP'''
filename = "data/temp/graph_alternative.png"
seaborn.jointplot(**eval(data)).savefig(name)
await self.bot.send_file(destination = ctx.message.channel, fp = filename, content = ctx.message.author.display_name + ':')
def _jointplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None,
show_ticks: bool = True):
import matplotlib.pyplot as plt
import seaborn as sns
import numpy
filename = filename or self._get_new_figure_filename()
length = min(len(first[property]), len(second[property]))
first_prop = first[property][0:length]
second_prop = second[property][0:length]
lim = (0, max(max(first_prop), max(second_prop)))
self._set_fig_size(size)
x1 = pd.Series(first_prop, name="{descr}: {prop}".format(descr=first.description(), prop=property))
x2 = pd.Series(second_prop, name="{descr}: {prop}".format(descr=second.description(), prop=property))
plt.xlim(lim)
g = None
try:
g = sns.jointplot(x1, x2, kind=self.misc["pair_kind"], size=size, space=0,
stat_func=self.stats_helper.tester.test, xlim=lim, ylim=lim)
if not show_ticks:
g.ax_joint.set_xticklabels([])
g.ax_joint.set_yticklabels([])
g.savefig(filename)
plt.close()
except BaseException as ex:
logging.warning(ex)
return filename
def stock():
#?????????????????, ?????????
stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"}
for stock, code in stock_list.items():
globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16")
stock_list2 = stock_list.keys()
#print(stock_list2)
sl = [globals()[st]["close"] for st in stock_list2]
df_close = pd.concat(sl,axis=1,join='inner')
df_close.columns = stock_list2
#print(df_close)
df_close.sort_index(ascending=True,inplace=True) #ascending ??????????????????
pc_ret = df_close.pct_change() #????????????????
print(pc_ret)
make_end_line()
print(pc_ret.mean())
make_end_line()
#????????????
plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="hex")) #?? ????????1?????????? 0????? -1????????
plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="scatter"))
plt.show(sns.jointplot("zsyh","szzs",pc_ret,kind="scatter"))
plt.show(sns.pairplot(pc_ret[["jsyh","zsyh","pfyh","msyh"]].dropna())) #??????????
print(pc_ret.std()) #????????????????????????????
make_end_line()
rets = pc_ret.dropna()
print(rets.mean())
make_end_line()
area = np.pi *20 #????
plt.scatter(rets.mean(),rets.std()) #???rets?????????xy?
plt.xlabel("Expected Return")#????xy????
plt.ylabel("Risk")
for label,x,y in zip(rets.columns,rets.mean(),rets.std()):
plt.annotate(
label,
xy = (x,y),xytext = (50,50),
textcoords = "offset points",ha = "right",va = "bottom",
arrowprops = dict(arrowstyle = "-",connectionstyle = "arc3,rad=-0.3"))
plt.show()
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs):
"""Plot the correlation between two variables.
Parameters
----------
on : list or dict of functions or strings
See `cohort.load.as_dataframe`
x_col : str, optional
If `on` is a dict, this guarantees we have the expected ordering.
plot_type : str, optional
Specify "jointplot", "regplot", "boxplot", or "barplot".
stat_func : function, optional.
Specify which function to use for the statistical test.
show_stat_func : bool, optional
Whether or not to show the stat_func result in the plot itself.
plot_kwargs : dict, optional
kwargs to pass through to plotting functions.
"""
if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]:
raise ValueError("Invalid plot_type %s" % plot_type)
plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs)
if len(plot_cols) != 2:
raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols))
for plot_col in plot_cols:
df = filter_not_null(df, plot_col)
if x_col is None:
x_col = plot_cols[0]
y_col = plot_cols[1]
else:
if x_col == plot_cols[0]:
y_col = plot_cols[1]
else:
y_col = plot_cols[0]
series_x = df[x_col]
series_y = df[y_col]
coeff, p_value = stat_func(series_x, series_y)
if plot_type == "jointplot":
plot = sb.jointplot(data=df, x=x_col, y=y_col,
stat_func=stat_func if show_stat_func else None,
**plot_kwargs)
elif plot_type == "regplot":
plot = sb.regplot(data=df, x=x_col, y=y_col,
**plot_kwargs)
elif plot_type == "boxplot":
plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs)
else:
plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs)
return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func,
series_x=series_x, series_y=series_y, plot=plot)
def main():
from argparse import ArgumentParser
p = ArgumentParser()
p.add_argument('--minlength', type=int, default=5)
p.add_argument('--maxlength', type=int, default=30)
p.add_argument('--examples', type=int, required=True)
p.add_argument('--seed', type=int, default=None)
p.add_argument('--profile', action='store_true')
p.add_argument('--grammar', choices=('medium','big'), default='medium')
p.add_argument('--aggressive', type=float, default=0,
help='Pruning rate (zero=no pruning, one=lots of pruning).')
args = p.parse_args()
if args.profile:
profile_run(examples = args.examples,
grammar = args.grammar,
maxlength = args.maxlength,
minlength = args.minlength,
aggressive = args.aggressive,
seed = args.seed)
else:
d = run(examples = args.examples,
grammar = args.grammar,
maxlength = args.maxlength,
minlength = args.minlength,
aggressive = args.aggressive,
seed = args.seed)
filename_base = 'tmp/cp-analysis-' + '-'.join('%s_%s' % (k,v) for k,v in sorted(args.__dict__.items()))
d.to_csv('%s.csv' % filename_base)
p = sns.jointplot('ratio', 'speedup', d, kind='reg')
p.savefig('%s.png' % filename_base)
print '[info] wrote %s.csv' % filename_base
print '== DONE =='
pl.ioff()
pl.show()
def sb_jointplot(self, req, debug=False):
image_list = []
image_filename = req["ImgFile"]
import seaborn as sns
import numpy as np
from matplotlib import pyplot
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle
from matplotlib.finance import volume_overlay
import pandas as pd
from pandas.tseries.offsets import BDay
source_df = req["SourceDF"]
ds_name = req["DSName"]
sns.set_style("whitegrid", {'axes.grid' : True})
sns.color_palette("Set1", n_colors=8, desat=.5)
cur_xlabel = "measurement"
cur_ylabel = "value"
cur_kind = "reg"
cur_width = 15.0
cur_height = 15.0
if "X" in req:
cur_xlabel = str(req["X"])
if "Y" in req:
cur_ylabel = str(req["Y"])
if "Width" in req:
cur_width = float(req["Width"])
if "Height" in req:
cur_height = float(req["Height"])
if "Kind" in req:
cur_kind = str(req["Kind"])
# end of parsing inputs
# Add custom plots here
grid = sns.jointplot(cur_xlabel, cur_ylabel, kind=cur_kind, data=source_df, annot_kws=dict(stat="r"))
if debug:
self.lg("Saving File(" + str(image_filename) + ")", 6)
grid.fig.set_figwidth(cur_width)
grid.fig.set_figheight(cur_height)
ax = grid.ax_joint
self.pd_add_footnote(ax.figure)
ax.figure.savefig(image_filename)
image_list.append(image_filename)
if req["ShowPlot"] == True:
plt.show()
return image_list
# end of sb_jointplot
def testcase1():
tips=sns.load_dataset('tips')
sns.jointplot('total_bill','tips',tips,kind='reg')
def sb_jointplot(self, req, debug=False):
image_list = []
image_filename = req["ImgFile"]
import seaborn as sns
import numpy as np
from matplotlib import pyplot
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle
from matplotlib.finance import volume_overlay
import pandas as pd
from pandas.tseries.offsets import BDay
source_df = req["SourceDF"]
ds_name = req["DSName"]
sns.set_style("whitegrid", {'axes.grid' : True})
sns.color_palette("Set1", n_colors=8, desat=.5)
cur_xlabel = "measurement"
cur_ylabel = "value"
cur_kind = "reg"
cur_width = 15.0
cur_height = 15.0
if "X" in req:
cur_xlabel = str(req["X"])
if "Y" in req:
cur_ylabel = str(req["Y"])
if "Width" in req:
cur_width = float(req["Width"])
if "Height" in req:
cur_height = float(req["Height"])
if "Kind" in req:
cur_kind = str(req["Kind"])
# end of parsing inputs
# Add custom plots here
grid = sns.jointplot(cur_xlabel, cur_ylabel, kind=cur_kind, data=source_df, annot_kws=dict(stat="r"))
if debug:
self.lg("Saving File(" + str(image_filename) + ")", 6)
grid.fig.set_figwidth(cur_width)
grid.fig.set_figheight(cur_height)
ax = grid.ax_joint
self.pd_add_footnote(ax.figure)
ax.figure.savefig(image_filename)
image_list.append(image_filename)
if req["ShowPlot"] == True:
plt.show()
return image_list
# end of sb_jointplot
def scatterCorr(arrayA, arrayB, threshold, outPath):
"""
Interpretation of strength of correlation
very weak: < 0,15
weak: 0,15-0,25
moderate: 0,25-0,40
strong: 0,40-0,75
very strong: >0,75
"""
corr = stats.spearmanr(arrayA, arrayB)
coefficient = float(format(corr[0], '.3f'))
pvalue = float(corr[1])
print "pvalue: ", pvalue
## Make scatterplot if rho >= threshold or <= -theshold
if (coefficient >= threshold) or (coefficient <= -threshold):
# Make scatterplot
fig = plt.figure(figsize=(6,6))
ax1 = fig.add_subplot(1, 1, 1)
#plot = sns.jointplot(x=arrayA, y=arrayB, kind="hex", xlim=(0,40), gridsize=50, dropna=True, cmap="Blues", stat_func=spearmanr)
plot = sns.jointplot(x=arrayA, y=arrayB, kind="kde", space=0, xlim=(0,30), gridsize=50, dropna=True, cmap="Blues", stat_func=spearmanr)
plt.xlabel('# L1', fontsize=12)
plt.ylabel('Replication time', fontsize=12)
# sns.plt.subplots_adjust(left=0.2, right=0.8, top=0.8, bottom=0.2) # shrink fig so cbar is visible
# cax = plot.fig.add_axes([.85, .25, .05, .4]) # x, y, width, height
# sns.plt.colorbar(cax=cax)
#sns.jointplot(x=arrayA, y=arrayB, kind="kde", space=0, color="b", xlim=(0,30))
## Save figure
fileName = outPath + '_' + str(coefficient) + '_correlation.pdf'
plt.savefig(fileName)
return coefficient, pvalue
#### MAIN ####
## Import modules ##