linear_regression.py 文件源码-python代码片段

linear_regression.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

项目：astrology 作者: mattsgithub 项目源码文件源码

def plot_f_distrib_for_many_coefficients(self, features):
        from scipy.stats import f

        # Remove a particular subset of features
        X = np.delete(self.X, [self.features.index(_) for _ in features], 1)

        # Prediction from reduced model
        XT = X.T
        std_error_matrix = inv(XT.dot(X))
        beta = std_error_matrix.dot(XT).dot(self.y)
        y_hat = X.dot(beta)
        rss_reduced_model = np.sum((self.y - y_hat)**2)

        dfn = len(features)
        dfd = self.df

        # This should be distributed as chi squared
        # with degrees of freedom equal to number
        # of dropped features
        rss_diff = (rss_reduced_model - self.rss)
        chi_1 = rss_diff / dfn
        chi_2 = self.pop_var
        f_score = chi_1 / chi_2

        # 5% and 95% percentile
        f_05, f_95 = f.ppf([0.05, 0.95], dfn, dfd)

        x = np.linspace(0.001, 5.0)

        plt.axvline(x=f_05)
        plt.axvline(x=f_95)

        plt.scatter(f_score, f.pdf(f_score, dfn, dfd), marker='o', color='red')
        plt.plot(x, f.pdf(x, dfn, dfd), color='gray', lw=5, alpha=0.6)
        plt.title('f-distribtion for dropping features: {0}'.format(features))
        plt.show()