def plot_log():
pylab.clf()
pylab.figure(num=None, figsize=(6, 5))
x = np.arange(0.001, 1, 0.001)
y = np.log(x)
pylab.title('Relationship between probabilities and their logarithm')
pylab.plot(x, y)
pylab.grid(True)
pylab.xlabel('P')
pylab.ylabel('log(P)')
filename = 'log_probs.png'
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
python类clf()的实例源码
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def plot_feat_hist(data_name_list, filename=None):
pylab.clf()
num_rows = 1 + (len(data_name_list) - 1) / 2
num_cols = 1 if len(data_name_list) == 1 else 2
pylab.figure(figsize=(5 * num_cols, 4 * num_rows))
for i in range(num_rows):
for j in range(num_cols):
pylab.subplot(num_rows, num_cols, 1 + i * num_cols + j)
x, name = data_name_list[i * num_cols + j]
pylab.title(name)
pylab.xlabel('Value')
pylab.ylabel('Density')
# the histogram of the data
max_val = np.max(x)
if max_val <= 1.0:
bins = 50
elif max_val > 50:
bins = 50
else:
bins = max_val
n, bins, patches = pylab.hist(
x, bins=bins, normed=1, facecolor='green', alpha=0.75)
pylab.grid(True)
if not filename:
filename = "feat_hist_%s.png" % name
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def log_false_positives(clf, X, y, name):
with open("FP_" + name.replace(" ", "_") + ".tsv", "w") as f:
false_positive = clf.predict(X) != y
for tweet, false_class in zip(X[false_positive], y[false_positive]):
f.write("%s\t%s\n" %
(false_class, tweet.encode("ascii", "ignore")))
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def plot_pr(auc_score, name, precision, recall, label=None):
pylab.clf()
pylab.figure(num=None, figsize=(5, 4))
pylab.grid(True)
pylab.fill_between(recall, precision, alpha=0.5)
pylab.plot(recall, precision, lw=1)
pylab.xlim([0.0, 1.0])
pylab.ylim([0.0, 1.0])
pylab.xlabel('Recall')
pylab.ylabel('Precision')
pylab.title('P/R curve (AUC = %0.2f) / %s' % (auc_score, label))
filename = name.replace(" ", "_")
pylab.savefig(
os.path.join(CHART_DIR, "pr_" + filename + ".png"), bbox_inches="tight")
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def show_most_informative_features(vectorizer, clf, n=20):
c_f = sorted(zip(clf.coef_[0], vectorizer.get_feature_names()))
top = zip(c_f[:n], c_f[:-(n + 1):-1])
for (c1, f1), (c2, f2) in top:
print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2))
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def plot_feat_hist(data_name_list, filename=None):
pylab.clf()
num_rows = 1 + (len(data_name_list) - 1) / 2
num_cols = 1 if len(data_name_list) == 1 else 2
pylab.figure(figsize=(5 * num_cols, 4 * num_rows))
for i in range(num_rows):
for j in range(num_cols):
pylab.subplot(num_rows, num_cols, 1 + i * num_cols + j)
x, name = data_name_list[i * num_cols + j]
pylab.title(name)
pylab.xlabel('Value')
pylab.ylabel('Density')
# the histogram of the data
max_val = np.max(x)
if max_val <= 1.0:
bins = 50
elif max_val > 50:
bins = 50
else:
bins = max_val
n, bins, patches = pylab.hist(
x, bins=bins, normed=1, facecolor='green', alpha=0.75)
pylab.grid(True)
if not filename:
filename = "feat_hist_%s.png" % name
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
utils.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def plot_bias_variance(data_sizes, train_errors, test_errors, name):
pylab.clf()
pylab.ylim([0.0, 1.0])
pylab.xlabel('Data set size')
pylab.ylabel('Error')
pylab.title("Bias-Variance for '%s'" % name)
pylab.plot(
data_sizes, train_errors, "-", data_sizes, test_errors, "--", lw=1)
pylab.legend(["train error", "test error"], loc="upper right")
pylab.grid(True)
pylab.savefig(os.path.join(CHART_DIR, "bv_" + name + ".png"))
def plot_forest_all_proba(y_proba_all, y_gt):
from matplotlib import pylab
N = len(y_gt)
num_tree = len(y_proba_all)
pylab.clf()
mat = np.zeros((num_tree, N))
LOGGER.info('mat.shape={}'.format(mat.shape))
for i in range(num_tree):
mat[i,:] = y_proba_all[i][(range(N), y_gt)]
pylab.matshow(mat, fignum=False, cmap='Blues', vmin=0, vmax=1.0)
pylab.grid(False)
pylab.show()
def twohists(x1,x2,xmin,xmax,range=None,x1leg='$x_1$',x2leg='$x_2$',xlabel='',fig=1,sharey=False,fontsize=12,bins1=10,bins2=10):
"""
Script that plots two histograms of quantities x1 and x2
sharing the same X-axis.
:param x1,x2: arrays with data to be plotted
:param xmin,xmax: lower and upper range of plotted values, will be used to set a consistent x-range
for both histograms.
:param x1leg, x2leg: legends for each histogram
:param xlabel: self-explanatory.
:param bins1,bins2: number of bins in each histogram
:param fig: which plot window should I use?
:param range: in the form (xmin,xmax), same as range argument for hist and applied to both
histograms.
Inspired by `Scipy <http://www.scipy.org/Cookbook/Matplotlib/Multiple_Subplots_with_One_Axis_Label>`_.
"""
pylab.rcParams.update({'font.size': fontsize})
fig=pylab.figure(fig)
pylab.clf()
a=fig.add_subplot(2,1,1)
if sharey==True:
b=fig.add_subplot(2,1,2, sharex=a, sharey=a)
else:
b=fig.add_subplot(2,1,2, sharex=a)
a.hist(x1,bins1,label=x1leg,color='b',histtype='stepfilled',range=range)
a.legend(loc='best',frameon=False)
a.set_xlim(xmin,xmax)
b.hist(x2,bins2,label=x2leg,color='r',histtype='stepfilled',range=range)
b.legend(loc='best',frameon=False)
pylab.setp(a.get_xticklabels(), visible=False)
b.set_xlabel(xlabel)
b.set_ylabel('Number',verticalalignment='bottom')
pylab.minorticks_on()
pylab.subplots_adjust(hspace=0.15)
pylab.draw()
pylab.show()
def threehists(x1,x2,x3,xmin,xmax,x1leg='$x_1$',x2leg='$x_2$',x3leg='$x_3$',xlabel='',fig=1,sharey=False,fontsize=12):
"""
Script that plots three histograms of quantities x1, x2 and x3
sharing the same X-axis.
Arguments:
- x1,x2,x3: arrays with data to be plotted
- xmin,xmax: lower and upper range of plotted values, will be used to set a consistent x-range for both histograms.
- x1leg, x2leg, x3leg: legends for each histogram
- xlabel: self-explanatory.
- sharey: sharing the Y-axis among the histograms?
- fig: which plot window should I use?
Example:
x1=Lbol(AD), x2=Lbol(JD), x3=Lbol(EHF10)
>>> threehists(x1,x2,x3,38,44,'AD','JD','EHF10','$\log L_{\\rm bol}$ (erg s$^{-1}$)',sharey=True)
Inspired by `Scipy <http://www.scipy.org/Cookbook/Matplotlib/Multiple_Subplots_with_One_Axis_Label>`_.
"""
pylab.rcParams.update({'font.size': fontsize})
fig=pylab.figure(fig)
pylab.clf()
a=fig.add_subplot(3,1,1)
if sharey==True:
b=fig.add_subplot(3,1,2, sharex=a, sharey=a)
c=fig.add_subplot(3,1,3, sharex=a, sharey=a)
else:
b=fig.add_subplot(3,1,2, sharex=a)
c=fig.add_subplot(3,1,3, sharex=a)
a.hist(x1,label=x1leg,color='b',histtype='stepfilled')
a.legend(loc='best',frameon=False)
a.set_xlim(xmin,xmax)
b.hist(x2,label=x2leg,color='r',histtype='stepfilled')
b.legend(loc='best',frameon=False)
c.hist(x3,label=x3leg,color='y',histtype='stepfilled')
c.legend(loc='best',frameon=False)
pylab.setp(a.get_xticklabels(), visible=False)
pylab.setp(b.get_xticklabels(), visible=False)
c.set_xlabel(xlabel)
b.set_ylabel('Number')
pylab.minorticks_on()
pylab.subplots_adjust(hspace=0.15)
pylab.draw()
pylab.show()
def fourcumplot(x1,x2,x3,x4,xmin,xmax,x1leg='$x_1$',x2leg='$x_2$',x3leg='$x_3$',x4leg='$x_3$',xlabel='',ylabel='$N(x>x\')$',fig=1,sharey=False,fontsize=12,bins1=50,bins2=50,bins3=50,bins4=50):
"""
Script that plots the cumulative histograms of four variables x1, x2, x3 and x4
sharing the same X-axis. For each bin, Y is the fraction of the sample
with values above X.
Arguments:
- x1,x2,x3,x4: arrays with data to be plotted
- xmin,xmax: lower and upper range of plotted values, will be used to set a consistent x-range
for both histograms.
- x1leg, x2leg, x3leg, x4leg: legends for each histogram
- xlabel: self-explanatory.
- sharey: sharing the Y-axis among the histograms?
- bins1,bins2,...: number of bins in each histogram
- fig: which plot window should I use?
Inspired by `Scipy <http://www.scipy.org/Cookbook/Matplotlib/Multiple_Subplots_with_One_Axis_Label>`_.
v1 Jun. 2012: inherited from fourhists.
"""
pylab.rcParams.update({'font.size': fontsize})
fig=pylab.figure(fig)
pylab.clf()
a=fig.add_subplot(4,1,1)
if sharey==True:
b=fig.add_subplot(4,1,2, sharex=a, sharey=a)
c=fig.add_subplot(4,1,3, sharex=a, sharey=a)
d=fig.add_subplot(4,1,4, sharex=a, sharey=a)
else:
b=fig.add_subplot(4,1,2, sharex=a)
c=fig.add_subplot(4,1,3, sharex=a)
d=fig.add_subplot(4,1,4, sharex=a)
a.hist(x1,bins1,label=x1leg,color='b',cumulative=-True,normed=True,histtype='stepfilled')
a.legend(loc='best',frameon=False)
a.set_xlim(xmin,xmax)
b.hist(x2,bins2,label=x2leg,color='r',cumulative=-True,normed=True,histtype='stepfilled')
b.legend(loc='best',frameon=False)
c.hist(x3,bins3,label=x3leg,color='y',cumulative=-True,normed=True,histtype='stepfilled')
c.legend(loc='best',frameon=False)
d.hist(x4,bins4,label=x4leg,color='g',cumulative=-True,normed=True,histtype='stepfilled')
d.legend(loc='best',frameon=False)
pylab.setp(a.get_xticklabels(), visible=False)
pylab.setp(b.get_xticklabels(), visible=False)
pylab.setp(c.get_xticklabels(), visible=False)
d.set_xlabel(xlabel)
c.set_ylabel(ylabel)
pylab.minorticks_on()
pylab.subplots_adjust(hspace=0.15)
pylab.draw()
pylab.show()
def plotPolicy(self, policy, prefix):
plt.clf()
for idx in xrange(len(policy)):
i, j = self.env.getStateXY(idx)
dx = 0
dy = 0
if policy[idx] == 0: # up
dy = 0.35
elif policy[idx] == 1: #right
dx = 0.35
elif policy[idx] == 2: #down
dy = -0.35
elif policy[idx] == 3: #left
dx = -0.35
elif self.matrixMDP[i][j] != -1 and policy[idx] == 4: # termination
circle = plt.Circle(
(j + 0.5, self.numRows - i + 0.5 - 1), 0.025, color='k')
plt.gca().add_artist(circle)
if self.matrixMDP[i][j] != -1:
plt.arrow(j + 0.5, self.numRows - i + 0.5 - 1, dx, dy,
head_width=0.05, head_length=0.05, fc='k', ec='k')
else:
plt.gca().add_patch(
patches.Rectangle(
(j, self.numRows - i - 1), # (x,y)
1.0, # width
1.0, # height
facecolor = "gray"
)
)
plt.xlim([0, self.numCols])
plt.ylim([0, self.numRows])
for i in xrange(self.numCols):
plt.axvline(i, color='k', linestyle=':')
plt.axvline(self.numCols, color='k', linestyle=':')
for j in xrange(self.numRows):
plt.axhline(j, color='k', linestyle=':')
plt.axhline(self.numRows, color='k', linestyle=':')
plt.savefig(self.outputPath + prefix + 'policy.png')
plt.close()
demo_mi.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def plot_mi_demo():
np.random.seed(0) # to reproduce the data later on
pylab.clf()
pylab.figure(num=None, figsize=(8, 8))
x = np.arange(0, 10, 0.2)
pylab.subplot(221)
y = 0.5 * x + norm.rvs(1, scale=.01, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(222)
y = 0.5 * x + norm.rvs(1, scale=.1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(223)
y = 0.5 * x + norm.rvs(1, scale=1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(224)
y = norm.rvs(1, scale=10, size=len(x))
_plot_mi_func(x, y)
pylab.autoscale(tight=True)
pylab.grid(True)
filename = "mi_demo_1.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
pylab.clf()
pylab.figure(num=None, figsize=(8, 8))
x = np.arange(-5, 5, 0.2)
pylab.subplot(221)
y = 0.5 * x ** 2 + norm.rvs(1, scale=.01, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(222)
y = 0.5 * x ** 2 + norm.rvs(1, scale=.1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(223)
y = 0.5 * x ** 2 + norm.rvs(1, scale=1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(224)
y = 0.5 * x ** 2 + norm.rvs(1, scale=10, size=len(x))
_plot_mi_func(x, y)
pylab.autoscale(tight=True)
pylab.grid(True)
filename = "mi_demo_2.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
demo_corr.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def plot_correlation_demo():
np.random.seed(0) # to reproduce the data later on
pylab.clf()
pylab.figure(num=None, figsize=(8, 8))
x = np.arange(0, 10, 0.2)
pylab.subplot(221)
y = 0.5 * x + norm.rvs(1, scale=.01, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(222)
y = 0.5 * x + norm.rvs(1, scale=.1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(223)
y = 0.5 * x + norm.rvs(1, scale=1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(224)
y = norm.rvs(1, scale=10, size=len(x))
_plot_correlation_func(x, y)
pylab.autoscale(tight=True)
pylab.grid(True)
filename = "corr_demo_1.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
pylab.clf()
pylab.figure(num=None, figsize=(8, 8))
x = np.arange(-5, 5, 0.2)
pylab.subplot(221)
y = 0.5 * x ** 2 + norm.rvs(1, scale=.01, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(222)
y = 0.5 * x ** 2 + norm.rvs(1, scale=.1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(223)
y = 0.5 * x ** 2 + norm.rvs(1, scale=1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(224)
y = 0.5 * x ** 2 + norm.rvs(1, scale=10, size=len(x))
_plot_correlation_func(x, y)
pylab.autoscale(tight=True)
pylab.grid(True)
filename = "corr_demo_2.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
demo_pca.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def plot_simple_demo_1():
pylab.clf()
fig = pylab.figure(num=None, figsize=(10, 4))
pylab.subplot(121)
title = "Original feature space"
pylab.title(title)
pylab.xlabel("$X_1$")
pylab.ylabel("$X_2$")
x1 = np.arange(0, 10, .2)
x2 = x1 + np.random.normal(scale=1, size=len(x1))
good = (x1 > 5) | (x2 > 5)
bad = ~good
x1g = x1[good]
x2g = x2[good]
pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue")
x1b = x1[bad]
x2b = x2[bad]
pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white")
pylab.grid(True)
pylab.subplot(122)
X = np.c_[(x1, x2)]
pca = decomposition.PCA(n_components=1)
Xtrans = pca.fit_transform(X)
Xg = Xtrans[good]
Xb = Xtrans[bad]
pylab.scatter(
Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue")
pylab.scatter(
Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white")
title = "Transformed feature space"
pylab.title(title)
pylab.xlabel("$X'$")
fig.axes[1].get_yaxis().set_visible(False)
print(pca.explained_variance_ratio_)
pylab.grid(True)
pylab.autoscale(tight=True)
filename = "pca_demo_1.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
demo_pca.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def plot_simple_demo_2():
pylab.clf()
fig = pylab.figure(num=None, figsize=(10, 4))
pylab.subplot(121)
title = "Original feature space"
pylab.title(title)
pylab.xlabel("$X_1$")
pylab.ylabel("$X_2$")
x1 = np.arange(0, 10, .2)
x2 = x1 + np.random.normal(scale=1, size=len(x1))
good = x1 > x2
bad = ~good
x1g = x1[good]
x2g = x2[good]
pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue")
x1b = x1[bad]
x2b = x2[bad]
pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white")
pylab.grid(True)
pylab.subplot(122)
X = np.c_[(x1, x2)]
pca = decomposition.PCA(n_components=1)
Xtrans = pca.fit_transform(X)
Xg = Xtrans[good]
Xb = Xtrans[bad]
pylab.scatter(
Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue")
pylab.scatter(
Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white")
title = "Transformed feature space"
pylab.title(title)
pylab.xlabel("$X'$")
fig.axes[1].get_yaxis().set_visible(False)
print(pca.explained_variance_ratio_)
pylab.grid(True)
pylab.autoscale(tight=True)
filename = "pca_demo_2.png"
pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")