MLNPCapstone.py 文件源码-python代码片段

MLNPCapstone.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：machine-learning-nanodegree-program-capstone 作者: harrylippy 项目源码文件源码

def pca_analysis(self):
        if not self._use_pca:
            return

        print "done.\n + Using PCA to analyze the data...",; stdout.flush()

        cols = self._get_columns()

        (X_train, _) = self._train_data
        if not self._pca:
            self._pca = RandomizedPCA(
                            n_components=self._pca_max_n, 
                            whiten=True,
                            random_state=42)
            self._pca.fit(X_train)

        # NOTE:  plot code stolen from sklearn example: http://bit.ly/1X8ZsUw
        fig = plt.figure(self._fig_count, figsize=(4,3))
        plt.clf()
        plt.axes([.2, .2, .7, .7])
        plt.plot(self._pca.explained_variance_ratio_)
        fig.suptitle('RandomizedPCA Analysis')
        plt.axis('tight')
        plt.xlabel('Component')
        plt.ylabel('Explained Variance Ratio')
        plt.show()
        self._fig_count += 1

        # Reset the PCA object, since we will need to set the exact number
        # of components we want to use if and when we use it again
        self._pca = None

    # Train a classifier pipeline that may or may not use PCA or other
    # feature selection methods