def visualize_results(self):
# Visualize logistic curve using seaborn
sns.set(style="darkgrid")
sns.regplot(x="pageviews_cumsum",
y="is_conversion",
data=self.df,
logistic=True,
n_boot=500,
y_jitter=.01,
scatter_kws={"s": 60})
sns.set(font_scale=1.3)
sns.plt.title('Logistic Regression Curve')
sns.plt.ylabel('Conversion probability')
sns.plt.xlabel('Cumulative sum of pageviews')
sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10)
sns.plt.show()
python类regplot()的实例源码
business_case_solver.py 文件源码
项目:themarketingtechnologist
作者: thomhopmans
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def show_scatter(df, xlim=(-5, 105), ylim=(-5, 105), color="black", marker="o", reg_fit=False):
"""Create a scatter plot of the data
Args:
df (pd.DataFrame): The data set to plot
xlim ((float, float)): The x-axis limits
ylim ((float, float)): The y-axis limits
color (str): The color of the scatter points
marker (str): The marker style for the scatter points
reg_fit (bool): Whether to plot a linear regression on the graph
"""
sns.regplot(
x="x",
y="y",
data=df,
ci=None,
fit_reg=reg_fit,
marker=marker,
scatter_kws={"s": 50, "alpha": 0.7, "color": color},
line_kws={"linewidth": 4, "color": "red"})
plt.xlim(xlim)
plt.ylim(ylim)
plt.tight_layout()
regression_modeling.py 文件源码
项目:-Python-Analysis_of_wine_quality
作者: ekolik
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def basic_linear(wine_set):
scat0 = seaborn.regplot(x="volatile_acidity", y="quality", fit_reg=True, data=wine_set)
plt.xlabel("Amount of volatile acidity in wine")
plt.ylabel("Quality level of wine (0-10 scale)")
plt.title("Association between the amount of volatile acidity in wine and the quality of wine")
plt.show()
# ----------- centering the explanatory variable by subrtacting the mean
f_acidity_mean = wine_set["volatile_acidity"].mean()
print("mean of the volatile acidity variable = ", f_acidity_mean)
wine_set["volatile_acidity"] = wine_set["volatile_acidity"] - f_acidity_mean
print("mean of the volatile acidity variable after normalization = ", wine_set["volatile_acidity"].mean())
print ("\nOLS regression model for the association between the amount of volatile acidity in wine and the quality of wine:")
model1 = smf.ols(formula="quality ~ volatile_acidity", data=wine_set)
results1 = model1.fit()
print(results1.summary())
# call(basic_linear)
# #___________________________________ Multiple Regression___________________________________________
def plot_eval(self, eval_dict, labels, path_extension=""):
"""
Plot the loss function in a overall plot and a zoomed plot.
:param path_extension: If the plot should be saved in an incremental way.
"""
def plot(x, y, fit, label):
sns.regplot(np.array(x), np.array(y), fit_reg=fit, label=label, scatter_kws={"s": 5})
plt.clf()
plt.subplot(211)
idx = np.array(eval_dict.values()[0]).shape[0]
x = np.array(eval_dict.values())
for i in range(idx):
plot(eval_dict.keys(), x[:, i], False, labels[i])
plt.legend()
plt.subplot(212)
for i in range(idx):
plot(eval_dict.keys()[-int(len(x) * 0.25):], x[-int(len(x) * 0.25):][:, i], True, labels[i])
plt.xlabel('Epochs')
plt.savefig(paths.get_plot_evaluation_path_for_model(self.model.get_root_path(), path_extension+".png"))
business_case_solver_without_classes.py 文件源码
项目:themarketingtechnologist
作者: thomhopmans
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def visualize_results(df):
# Visualize logistic curve using seaborn
sns.set(style="darkgrid")
sns.regplot(x="pageviews_cumsum",
y="is_conversion",
data=df,
logistic=True,
n_boot=500,
y_jitter=.01,
scatter_kws={"s": 60})
sns.set(font_scale=1.3)
sns.plt.title('Logistic Regression Curve')
sns.plt.ylabel('Conversion probability')
sns.plt.xlabel('Cumulative sum of pageviews')
sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10)
sns.plt.show()
# Run the final program
def plot_eval(self, eval_dict, labels, path_extension=""):
"""
Plot the loss function in a overall plot and a zoomed plot.
:param path_extension: If the plot should be saved in an incremental way.
"""
def plot(x, y, fit, label):
sns.regplot(np.array(x), np.array(y), fit_reg=fit, label=label, scatter_kws={"s": 5})
plt.clf()
plt.subplot(211)
idx = np.array(eval_dict.values()[0]).shape[0]
x = np.array(eval_dict.values())
for i in range(idx):
plot(eval_dict.keys(), x[:, i], False, labels[i])
plt.legend()
plt.subplot(212)
for i in range(idx):
plot(eval_dict.keys()[-int(len(x) * 0.25):], x[-int(len(x) * 0.25):][:, i], True, labels[i])
plt.xlabel('Epochs')
plt.savefig(paths.get_plot_evaluation_path_for_model(self.model.get_root_path(), path_extension+".png"))
def show_scatter_and_results(df):
"""Creates a plot which shows both the plot and the statistical summary
Args:
df (pd.DataFrame): The data set to plot
labels (List[str]): The labels to use for
"""
plt.figure(figsize=(12, 5))
sns.regplot("x", y="y", data=df, ci=None, fit_reg=False,
scatter_kws={"s": 50, "alpha": 0.7, "color": "black"})
plt.xlim(-5, 105)
plt.ylim(-5, 105)
plt.tight_layout()
res = get_values(df)
fs = 30
y_off = -5
labels = ("X Mean", "Y Mean", "X SD", "Y SD", "Corr.")
max_label_length = max([len(l) for l in labels])
# If `max_label_length = 10`, this string will be "{:<10}: {:0.9f}", then we
# can pull the `.format` method for that string to reduce typing it
# repeatedly
formatter = '{{:<{pad}}}: {{:0.9f}}'.format(pad=max_label_length).format
corr_formatter = '{{:<{pad}}}: {{:+.9f}}'.format(pad=max_label_length).format
opts = dict(fontsize=fs, alpha=0.3)
plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-2], **opts)
plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-2], **opts)
plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-2], **opts)
plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-2], **opts)
plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-2], **opts)
opts['alpha'] = 1
plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-7], **opts)
plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-7], **opts)
plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-7], **opts)
plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-7], **opts)
plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-7], **opts)
plt.tight_layout(rect=[0, 0, 0.57, 1])
def pearson(wine_set):
scat1 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=wine_set)
plt.xlabel("Density of wine")
plt.ylabel("Residual sugar in wine, gram")
plt.title("Association between wine's density and residual sugar")
plt.show()
print(scipy.stats.pearsonr(wine_set['density'], wine_set["residual_sugar"]))
# print('----------------Pearson Correlation------------------------')
# call(pearson)
# -----------------------------------------Exploring Statistical Interactions------------------
def explore(wine_set):
low = wine_set[wine_set['quality'] <= 5]
medium = wine_set[(wine_set['quality'] == 6) | (wine_set['quality'] == 7)]
high = wine_set[wine_set['quality'] > 7]
print('association between wine`s density and residual sugar for wines \nof `low` quality')
print(scipy.stats.pearsonr(low['density'], low["residual_sugar"]))
print('\nof `medium` quality')
print(scipy.stats.pearsonr(medium['density'], medium["residual_sugar"]))
print('\nof `high` quality')
print(scipy.stats.pearsonr(high['density'], high["residual_sugar"]))
scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=low)
plt.xlabel("Density of wine")
plt.ylabel("Residual sugar in wine, gram")
plt.title("Association between wine's density and residual sugar for wines of `low` quality")
plt.show()
scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=medium)
plt.xlabel("Density of wine")
plt.ylabel("Residual sugar in wine, gram")
plt.title("Association between wine's density and residual sugar for wines of `medium` quality")
plt.show()
scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=high)
plt.xlabel("Density of wine")
plt.ylabel("Residual sugar in wine, gram")
plt.title("Association between wine's density and residual sugar for wines of `high` quality")
plt.show()
def plot_angle_comparison(disc, whole, lgdtext=None, fname=None):
# if fname is not None:
# mpl.rc("savefig", dpi=300)
if lgdtext is None:
lgdtext = [u"Disc-Only Angle (deg)", 'Whole TRPV1 Angle (deg)']
sns.set(font_scale=3)
f, ax = plt.subplots(1, 3, figsize=(30, 10))
sns.regplot(whole["rlnAngleRot"], disc["rlnAngleRot"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[0])
ax[0].set_xlim((-45, 45))
ax[0].set_ylim((-45, 45))
ax[0].set_xticks(np.arange(-45, 46, 15))
ax[0].set_yticks(np.arange(-45, 46, 15))
ax[0].xaxis.label.set_visible(False)
ax[0].set_ylabel(lgdtext[0])
ax[0].set_title(u"$\phi$ ( $Z$ )", y=1.01)
sns.regplot(whole["rlnAngleTilt"], disc["rlnAngleTilt"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[1])
ax[1].set_xlim((0, 180))
ax[1].set_ylim((0, 180))
ax[1].set_xticks(np.arange(0, 181, 30))
ax[1].set_yticks(np.arange(0, 181, 30))
ax[1].xaxis.label.set_visible(False)
ax[1].yaxis.label.set_visible(False)
ax[1].set_title(u"$\theta$ ( $Y'$ )", y=1.01)
sns.regplot(whole["rlnAnglePsi"], disc["rlnAnglePsi"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[2])
ax[2].set_xlim((-180, 180))
ax[2].set_ylim((-180, 180))
ax[2].set_xticks(np.arange(-180, 181, 45))
ax[2].set_yticks(np.arange(-180, 181, 45))
ax[2].xaxis.label.set_visible(False)
ax[2].yaxis.label.set_visible(False)
ax[2].set_title(u"$\psi$ ( $Z''$ )", y=1.01)
f.text(0.5, -0.05, lgdtext[1], ha='center', fontsize=36)
f.tight_layout(pad=1., w_pad=-1.5, h_pad=0.5)
if fname is not None:
f.savefig(fname, dpi=300)
# mpl.rc("savefig", dpi=80)
return f, ax
def regression(data,x,y,xscale='linear',yscale='linear'):
sns.set_context("notebook", font_scale=.8, rc={"lines.linewidth": 0})
sns.set_style('white')
g = sns.regplot(x=x, y=y, data=data)
plt.tick_params(axis='both', which='major', pad=10)
g.set(xscale=xscale)
g.set(yscale=yscale)
sns.despine()
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs):
"""Plot the correlation between two variables.
Parameters
----------
on : list or dict of functions or strings
See `cohort.load.as_dataframe`
x_col : str, optional
If `on` is a dict, this guarantees we have the expected ordering.
plot_type : str, optional
Specify "jointplot", "regplot", "boxplot", or "barplot".
stat_func : function, optional.
Specify which function to use for the statistical test.
show_stat_func : bool, optional
Whether or not to show the stat_func result in the plot itself.
plot_kwargs : dict, optional
kwargs to pass through to plotting functions.
"""
if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]:
raise ValueError("Invalid plot_type %s" % plot_type)
plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs)
if len(plot_cols) != 2:
raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols))
for plot_col in plot_cols:
df = filter_not_null(df, plot_col)
if x_col is None:
x_col = plot_cols[0]
y_col = plot_cols[1]
else:
if x_col == plot_cols[0]:
y_col = plot_cols[1]
else:
y_col = plot_cols[0]
series_x = df[x_col]
series_y = df[y_col]
coeff, p_value = stat_func(series_x, series_y)
if plot_type == "jointplot":
plot = sb.jointplot(data=df, x=x_col, y=y_col,
stat_func=stat_func if show_stat_func else None,
**plot_kwargs)
elif plot_type == "regplot":
plot = sb.regplot(data=df, x=x_col, y=y_col,
**plot_kwargs)
elif plot_type == "boxplot":
plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs)
else:
plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs)
return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func,
series_x=series_x, series_y=series_y, plot=plot)
def __init__(self, path, games, logger, suffix):
super(YesNo, self).__init__(path, self.__class__.__name__, suffix)
# basic storage for statistics
yes_no = collections.defaultdict(list)
number_yesno = collections.defaultdict(int)
MAX = 15
for i, game in enumerate(games):
if game.status == "incomplete":
continue
yn = []
for a in game.answers:
a = a.lower()
if a == "yes":
number_yesno["yes"] +=1
yn.append(1)
elif a == "no":
number_yesno["no"] += 1
yn.append(0)
else:
number_yesno["n/a"] += 1
yn.append(0.5)
no_question = len(game.answers)
yes_no[no_question].append(yn)
sns.set(style="whitegrid")
max_no_question = min(MAX, max(yes_no.keys())) + 1
fig = None
for key, yn in yes_no.items():
no_question = int(key)
yn_mean = np.array(yn).mean(axis=0)
if no_question < max_no_question :
fig = sns.regplot(x=np.arange(1, no_question + 1, 1), y=yn_mean, lowess=True, scatter=False)
#dummy legend
sns.regplot(x=np.array([-1]), y=np.array([-1]), scatter=False, line_kws={'linestyle':'-'}, label="Ratio yes-no",ci=None, color="g")
fig.legend(loc="best", fontsize='x-large')
fig.set_xlim(1, max_no_question)
fig.set_ylim(0.1, 1)
fig.set_xlabel("Number of questions", {'size': '14'})
fig.set_ylabel('Ratio yes-no', {'size': '14'})
def __init__(self, path, games, logger, suffix):
super(QuestionVsObject, self).__init__(path, self.__class__.__name__, suffix)
ratio_q_object = []
for game in games:
no_object = len(game.objects)
no_question = len(game.questions)
ratio_q_object.append([no_object,no_question])
ratio_q_object = np.array(ratio_q_object)
sns.set(style="white")
x = np.linspace(3, 20, 80)
counter = collections.defaultdict(list)
for k, val in ratio_q_object:
counter[k] += [val]
arr = np.zeros( [4, 21])
for k, val in counter.items():
if len(val) > 0:
arr[0,k] = k
arr[1,k] = np.mean(val)
# Std
arr[2, k] = np.std(val)
# confidence interval 95%
arr[3,k] = 1.95*np.std(val)/np.sqrt(len(val))
#plt.plot(arr[0,:],arr[1,:] , 'b.', label="Human behavior")
sns.regplot(x=ratio_q_object[:, 0], y=ratio_q_object[:, 1], x_ci=None, x_bins=20, order=4, label="Human behavior", marker="o", line_kws={'linestyle':'-'})
plt.fill_between(x=arr[0,:], y1=arr[1,:]-arr[2,:], y2=arr[1,:]+arr[2,:], alpha=0.2)
sns.regplot (x=x, y=np.log2(x), order=6, scatter=False, label="y = log2(x)", line_kws={'linestyle':'--'})
f = sns.regplot(x=x, y=x , order=1, scatter=False, label="y = x" , line_kws={'linestyle':'--'})
f.legend(loc="best", fontsize='x-large')
f.set_xlim(3,20)
f.set_ylim(0,20)
f.set_xlabel("Number of objects", {'size':'14'})
f.set_ylabel("Number of questions", {'size':'14'})
plots.py 文件源码
项目:Comparative-Annotation-Toolkit
作者: ComparativeGenomicsToolkit
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def improvement_plot(consensus_data, ordered_genomes, improvement_tgt):
def do_kdeplot(x, y, ax, n_levels=None, bw='scott'):
try:
sns.kdeplot(x, y, ax=ax, cut=0, cmap='Purples_d', shade=True, shade_lowest=False, n_levels=n_levels, bw=bw,
rasterized=True)
except:
logger.warning('Unable to do a KDE fit to AUGUSTUS improvement.')
pass
with improvement_tgt.open('w') as outf, PdfPages(outf) as pdf, sns.axes_style("whitegrid"):
for genome in ordered_genomes:
data = pd.DataFrame(consensus_data[genome]['Evaluation Improvement']['changes'])
unchanged = consensus_data[genome]['Evaluation Improvement']['unchanged']
if len(data) == 0:
continue
data.columns = ['transMap original introns',
'transMap intron annotation support',
'transMap intron RNA support',
'Original introns',
'Intron annotation support',
'Intron RNA support',
'transMap alignment goodness',
'Alignment goodness']
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2)
for ax in [ax1, ax2, ax3]: # goodness plots are allowed to auto-set scale
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
goodness_min = min(data['Alignment goodness'])
ax4.set_xlim(goodness_min, 100)
ax4.set_ylim(goodness_min, 100)
do_kdeplot(data['transMap original introns'], data['Original introns'], ax1, n_levels=25, bw=2)
sns.regplot(x=data['transMap original introns'], y=data['Original introns'], ax=ax1,
color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
do_kdeplot(data['transMap intron annotation support'], data['Intron annotation support'], ax2,
n_levels=25, bw=2)
sns.regplot(x=data['transMap intron annotation support'], y=data['Intron annotation support'], ax=ax2,
color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
do_kdeplot(data['transMap intron RNA support'], data['Intron RNA support'], ax3, n_levels=25, bw=2)
sns.regplot(x=data['transMap intron RNA support'], y=data['Intron RNA support'], ax=ax3,
color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
do_kdeplot(data['transMap alignment goodness'], data['Alignment goodness'], ax4, n_levels=20, bw=1)
sns.regplot(x=data['transMap alignment goodness'], y=data['Alignment goodness'], ax=ax4,
color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
fig.suptitle('AUGUSTUS metric improvements for {:,} transcripts in {}.\n'
'{:,} transMap transcripts were chosen.'.format(len(data), genome, unchanged))
for ax in [ax1, ax2, ax3, ax4]:
ax.set(adjustable='box-forced', aspect='equal')
fig.subplots_adjust(hspace=0.3)
multipage_close(pdf, tight_layout=False)