helpers.py 文件源码-python代码片段

def pca_fit(X, var_ratio=1, return_transform=True):
    """

    Parameters
    ----------
    X : array_like
        An array of data samples with shape (n_samples, n_features).
    var_ratio : float
        The variance ratio to be captured (Default value = 1).
    return_transform : bool
        Whether to apply the transformation to the given data.

    Returns
    -------
    array_like
        If return_transform is True, an array with shape (n_samples, n_components) which is the input samples projected
        onto `n_components` principal components. Otherwise the first `n_components` eigenvectors of the covariance
        matrix corresponding to the `n_components` largest eigenvalues are returned as rows.

    """

    cov_ = np.cov(X, rowvar=False)  # Mean is removed
    evals, evecs = LA.eigh(cov_)  # Get eigenvalues in ascending order, eigenvectors in columns
    evecs = np.fliplr(evecs)  # Flip eigenvectors to get them in descending eigenvalue order

    if var_ratio == 1:
        L = evecs.T
    else:
        evals = np.flip(evals, axis=0)
        var_exp = np.cumsum(evals)
        var_exp = var_exp / var_exp[-1]
        n_components = np.argmax(np.greater_equal(var_exp, var_ratio))
        L = evecs.T[:n_components]  # Set the first n_components eigenvectors as rows of L

    if return_transform:
        return X.dot(L.T)
    else:
        return L