def pca_analysis(self):
if not self._use_pca:
return
print "done.\n + Using PCA to analyze the data...",; stdout.flush()
cols = self._get_columns()
(X_train, _) = self._train_data
if not self._pca:
self._pca = RandomizedPCA(
n_components=self._pca_max_n,
whiten=True,
random_state=42)
self._pca.fit(X_train)
# NOTE: plot code stolen from sklearn example: http://bit.ly/1X8ZsUw
fig = plt.figure(self._fig_count, figsize=(4,3))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.plot(self._pca.explained_variance_ratio_)
fig.suptitle('RandomizedPCA Analysis')
plt.axis('tight')
plt.xlabel('Component')
plt.ylabel('Explained Variance Ratio')
plt.show()
self._fig_count += 1
# Reset the PCA object, since we will need to set the exact number
# of components we want to use if and when we use it again
self._pca = None
# Train a classifier pipeline that may or may not use PCA or other
# feature selection methods
MLNPCapstone.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录