python类check_array()的实例源码-面圈网

neighbors.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def decision_function(self, X):
        """Compute the distances to the nearest centroid for
        an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
        Returns
        -------
        C : array, shape = [n_samples]
        """
        from sklearn.metrics.pairwise import pairwise_distances
        from sklearn.utils.validation import check_array, check_is_fitted

        check_is_fitted(self, 'centroids_')

        X = check_array(X, accept_sparse='csr')

        return pairwise_distances(X, self.centroids_,
                                  metric=self.metric).min(axis=1)

util.py 文件源码项目：skutil 作者: tgsmith61591 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def from_array(X, column_names=None):
    """A simple wrapper for H2OFrame.from_python. This takes a
    numpy array (or 2d array) and returns an H2OFrame with all 
    the default args.

    Parameters
    ----------

    X : ndarray
        The array to convert.

    column_names : list, tuple (default=None)
        the names to use for your columns

    Returns
    -------

    H2OFrame
    """
    X = check_array(X, force_all_finite=False)
    return from_pandas(pd.DataFrame.from_records(data=X, columns=column_names))

predictive_imputer.py 文件源码项目：predictive_imputer 作者: log0ymxm 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def transform(self, X):
        check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_'])
        X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False)
        if X.shape[1] != self.statistics_.shape[1]:
            raise ValueError("X has %d features per sample, expected %d"
                             % (X.shape[1], self.statistics_.shape[1]))

        X_nan = np.isnan(X)
        imputed = self.initial_imputer.fit_transform(X)

        if len(self.estimators_) > 1:
            for i, estimator_ in enumerate(self.estimators_):
                X_s = np.delete(imputed, i, 1)
                y_nan = X_nan[:, i]

                X_unk = X_s[y_nan]
                if len(X_unk) > 0:
                    X[y_nan, i] = estimator_.predict(X_unk)

        else:
            estimator_ = self.estimators_[0]
            X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan]

        return X

event.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def predict(self, X):
        """Applies learned event segmentation to new testing dataset

        Alternative function for segmenting a new dataset after using
        fit() to learn a sequence of events, to comply with the sklearn
        Classifier interface

        Parameters
        ----------
        X: timepoint by voxel ndarray
            fMRI data to segment based on previously-learned event patterns

        Returns
        -------
        Event label for each timepoint
        """
        check_is_fitted(self, ["event_pat_", "event_var_"])
        X = check_array(X)
        segments, test_ll = self.find_events(X)
        return np.argmax(segments, axis=1)

MinMaxScaler.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def transform(self, X):
        """Scaling features of X according to feature_range.

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            Input data that will be transformed.
        """
        check_is_fitted(self, 'scale_')

        X = check_array(X, accept_sparse="csc", copy=self.copy,
                        dtype=np.float32)

        if sparse.issparse(X):
            for i in range(X.shape[1]):
                X.data[X.indptr[i]:X.indptr[i + 1]] *= self.scale_[i]
                X.data[X.indptr[i]:X.indptr[i + 1]] += self.min_[i]
        else:
            X *= self.scale_
            X += self.min_
        return X

template.py 文件源码项目：project-template 作者: scikit-learn-contrib 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def predict(self, X):
        """ A reference implementation of a prediction for a classifier.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of int of shape = [n_samples]
            The label for each sample is the label of the closest sample
            seen udring fit.
        """
        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]

template.py 文件源码项目：project-template 作者: scikit-learn-contrib 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def fit(self, X, y=None):
        """A reference implementation of a fitting function for a transformer.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.
        y : None
            There is no need of a target in a transformer, yet the pipeline API
            requires this parameter.

        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X)

        self.input_shape_ = X.shape

        # Return the transformer
        return self

bounded_logistic.py 文件源码项目：pyAFM 作者: cmaclell 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def predict_proba(self, X, X2):
        """
        Returns the probability of class 1 for each x in X.
        """
        try:
            getattr(self, "intercept1_")
            getattr(self, "intercept2_")
            getattr(self, "coef1_")
            getattr(self, "coef2_")
        except AttributeError:
            raise RuntimeError("You must train classifer before predicting data!")

        X = check_array(X)
        X2 = check_array(X2)

        if self.fit_first_intercept:
            X = np.insert(X, 0, 1, axis=1)
        if self.fit_second_intercept:
            X2 = np.insert(X2, 0, 1, axis=1)

        w = np.insert(self.coef1_, 0, self.intercept1_)
        w2 = np.insert(self.coef2_, 0, self.intercept2_)
        return (invlogit_vect(np.dot(w, np.transpose(X))) *
                invlogit_vect(np.dot(w2, np.transpose(X2))))

custom_logistic.py 文件源码项目：pyAFM 作者: cmaclell 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def predict_proba(self, X):
        """
        Returns the probability of class 1 for each x in X.
        """
        try:
            getattr(self, "intercept_")
            getattr(self, "coef_")
        except AttributeError:
            raise RuntimeError("You must train classifer before predicting data!")

        X = check_array(X)
        if self.fit_intercept:
            X = np.insert(X, 0, 1, axis=1)

        w = np.insert(self.coef_, 0, self.intercept_)
        return invlogit_vect(np.dot(w, np.transpose(X)))

utils_test.py 文件源码项目：dask-searchcv 作者: dask 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def fit(self, X, y, **fit_params):
        assert len(X) == len(y)
        if self.check_X is not None:
            assert self.check_X(X)
        if self.check_y is not None:
            assert self.check_y(y)
        self.classes_ = np.unique(check_array(y, ensure_2d=False,
                                              allow_nd=True))
        if self.expected_fit_params:
            missing = set(self.expected_fit_params) - set(fit_params)
            assert len(missing) == 0, ('Expected fit parameter(s) %s not '
                                       'seen.' % list(missing))
            for key, value in fit_params.items():
                assert len(value) == len(X), ('Fit parameter %s has length %d; '
                                              'expected %d.' % (key, len(value),
                                                                len(X)))
        return self

neural_network.py 文件源码项目：Optimus 作者: Yatoom 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def predict(self, X):
        """
        Predict class value for X.
        :param X: {array-like, sparse matrix}, shape (n_samples, n_features). Input data, where `n_samples` is the 
        number of samples and `n_features` is the number of features.
        :return: Returns self. 
        """

        # Numpy
        X = np.array(X)

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        return np.argmax(self.model.predict(X, verbose=self.verbose), axis=1)

neural_network.py 文件源码项目：Optimus 作者: Yatoom 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def predict_proba(self, X):
        """
        Predict class probabilities for X.
        :param X: {array-like, sparse matrix}, shape (n_samples, n_features). Input data, where `n_samples` is the 
        number of samples and `n_features` is the number of features.
        :return: Returns self. 
        """

        # Numpy
        X = np.array(X)

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        return self.model.predict_proba(X, verbose=self.verbose)

tree.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X

Kmodes.py 文件源码项目：pyspark-distributed-kmodes 作者: ThinkBigAnalytics 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def _labels_cost(X, centroids):
    """Calculate labels and cost function given a matrix of points and
    a list of centroids for the k-modes algorithm.
    """

    X = check_array(X, dtype = "object")

    npoints = X.shape[0]
    cost = 0.
    labels = np.empty(npoints, dtype='int64')
    for ipoint, curpoint in enumerate(X):
        diss = matching_dissim(centroids, curpoint)
        clust = np.argmin(diss)
        labels[ipoint] = clust
        cost += diss[clust]

    return labels, cost

lmnn.py 文件源码项目：pylmnn 作者: johny-c 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def transform(self, X=None):
        """Applies the learned transformation to the inputs.

        Parameters
        ----------
        X : array_like
            An array of data samples with shape (n_samples, n_features_in) (default: None, defined when fit is called).

        Returns
        -------
        array_like
            An array of transformed data samples with shape (n_samples, n_features_out).

        """
        if X is None:
            X = self.X_
        else:
            X = check_array(X)

        return X.dot(self.L_.T)

MinMaxScaler.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def inverse_transform(self, X):
        """Undo the scaling of X according to feature_range.

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            Input data that will be transformed.
        """
        check_is_fitted(self, 'scale_')

        X = check_array(X, copy=self.copy, accept_sparse="csc", ensure_2d=False)
        X -= self.min_
        X /= self.scale_
        return X

StandardScaler.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def transform(self, X, y=None, copy=None):
        """Perform standardization by centering and scaling

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data used to scale along the features axis.
        """
        check_is_fitted(self, 'std_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, accept_sparse="csc",
                        dtype=np.float32, ensure_2d=False)
        if sparse.issparse(X):
            if self.center_sparse:
                for i in range(X.shape[1]):
                    X.data[X.indptr[i]:X.indptr[i + 1]] -= self.mean_[i]

            elif self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")

            else:
                pass

            if self.std_ is not None:
                inplace_column_scale(X, 1 / self.std_)
        else:
            if self.with_mean:
                X -= self.mean_
            if self.with_std:
                X /= self.std_
        return X

template.py 文件源码项目：project-template 作者: scikit-learn-contrib 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def predict(self, X):
        """ A reference implementation of a predicting function.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples]
            Returns :math:`x^2` where :math:`x` is the first column of `X`.
        """
        X = check_array(X)
        return X[:, 0]**2

template.py 文件源码项目：project-template 作者: scikit-learn-contrib 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def transform(self, X):
        """ A reference implementation of a transform function.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        X_transformed : array of int of shape = [n_samples, n_features]
            The array containing the element-wise square roots of the values
            in `X`
        """
        # Check is fit had been called
        check_is_fitted(self, ['input_shape_'])

        # Input validation
        X = check_array(X)

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape != self.input_shape_:
            raise ValueError('Shape of input is different from what was seen'
                             'in `fit`')
        return np.sqrt(X)

simhash.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def fit(self, X, y=None):
        """
        Parameters
        ----------
        X : {array, sparse matrix}, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.
        Returns
        -------
        self : object
            Returns self.
        """
        from simhash import compute
        self._fit_X = X = check_array(X, accept_sparse='csr')

        n_features = X.shape[1]

        def _scale_hash_32_64bit(indices):
            return indices*((2**64-1)//2**32-1)

        hash_func = self.hash_func

        hashing_table = np.array(
                [hash_func(el, 0) for el in range(n_features)],
                dtype='uint64')

        shash = []
        for idx in range(X.shape[0]):
            # get hashes of indices
            mhash = hashing_table[X[idx].indices]
            if self.hash_func_nbytes == 32:
                mhash = _scale_hash_32_64bit(mhash)
            shash.append(compute(mhash))
        _fit_shash = np.asarray(shash, dtype='uint64')
        self._fit_shash = _fit_shash
        self._fit_shash_dict = {val: key
                                for key, val in enumerate(self._fit_shash)}

neighbors.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        """Fit the model using X as training data
        Parameters
        ----------
        X : {array-like, sparse matrix, BallTree, KDTree}
            Training data, shape [n_samples, n_features],

        """
        X = check_array(X, accept_sparse='csr')
        y = np.asarray(y, dtype='int')
        y_unique = np.unique(y)

        index = np.arange(len(y), dtype='int')

        if len(y_unique) == 0:
            raise ValueError('The training set must have at least '
                             'one document category!')

        # define nearest neighbors search objects for each category
        self._mod = [NearestNeighbors(n_neighbors=1,
                                      leaf_size=self.leaf_size,
                                      algorithm=self.algorithm,
                                      n_jobs=self.n_jobs,
                                      # euclidean metric by default
                                      metric='cosine',
                                      ) for el in range(len(y_unique))]

        index_mapping = []
        for imod, y_val in enumerate(y_unique):
            mask = (y == y_val)
            index_mapping.append(index[mask])
            self._mod[imod].fit(X[mask])

        self.index_mapping = index_mapping

neighbors.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def kneighbors(self, X=None, batch_size=5000):
        """Finds the K-neighbors of a point.
        Returns indices of and distances to the neighbors of each point.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            the input array
        batch_size : int
            the batch size
        Returns
        -------
        S_cos : array [n_samples, n_categories]
            the cosine similarity to closest point in each category
        indices : array [n_samples, n_categories]
            Indices of the nearest points in the population matrix.
        --------
        """
        X = check_array(X, accept_sparse='csr')

        n_classes = len(self._mod)

        S_res = np.zeros((X.shape[0], n_classes), dtype='float')
        nn_idx_res = np.zeros((X.shape[0], n_classes), dtype='int')

        for imod in range(n_classes):
            D_i, nn_idx_i_loc = _chunk_kneighbors(self._mod[imod].kneighbors,
                                                  X,
                                                  batch_size=batch_size)

            # only NearestNeighbor-1 (only one column in the kneighbors output)
            # convert from eucledian distance in L2 norm space to cosine
            # similarity
            # S_cos = seuclidean_dist2cosine_sim(D_i[:,0])
            S_res[:, imod] = 1 - D_i[:, 0]
            # map local index within index_mapping to global index
            nn_idx_res[:, imod] = self.index_mapping[imod][nn_idx_i_loc[:, 0]]

        return S_res, nn_idx_res

feature_weighting.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def fit(self, X, y=None):
        """Learn the document lenght and document frequency vector
        (if necessary).

        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        """
        X = check_array(X, ['csr'], copy=self.copy)
        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stp' or self.compute_df:
            self.df_ = _document_frequency(X)
        else:
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
        else:
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        if scheme_n.endswith('p') and self.norm_pivot is None:
            # Need to compute the pivot if it's not provided
            _, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,
                                              df_n_samples,
                                              norm_alpha=self.norm_alpha,
                                              norm_pivot=self.norm_pivot,
                                              return_pivot=True)

        return self

feature_weighting.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def fit_transform(self, X, y=None):
        """Apply document term weighting and normalization on text features

        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        """
        X = check_array(X, ['csr'], copy=self.copy)

        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stpd' or self.compute_df:
            self.df_ = _document_frequency(X)
        else:
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
        else:
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        X, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,
                                          df_n_samples,
                                          norm_alpha=self.norm_alpha,
                                          norm_pivot=self.norm_pivot,
                                          return_pivot=True)
        return X

feature_weighting.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def transform(self, X, y=None):
        """Apply document term weighting and normalization on text features

        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        copy : boolean, default True
            Whether to copy X and operate on the copy or perform in-place
            operations.
        """
        X = check_array(X, ['csr'], copy=self.copy)
        check_is_fitted(self, 'dl_', 'vector is not fitted')
        if X.shape[1] != self._n_features:
            raise ValueError(('Model fitted with n_features={} '
                              'but X.shape={}')
                             .format(self._n_features, X.shape))

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        return _smart_tfidf(X, self.weighting, self.df_,
                            df_n_samples,
                            norm_alpha=self.norm_alpha,
                            norm_pivot=self.norm_pivot)

gp_tf.py 文件源码项目：ottertune 作者: cmu-db 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def check_array(self, X):
        from sklearn.utils.validation import check_array
        return check_array(X, allow_nd=True, estimator="GPR")

factorization_machine.py 文件源码项目：polylearn 作者: scikit-learn-contrib 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _predict(self, X):
        if not hasattr(self, "P_"):
            raise NotFittedError("Estimator not fitted.")
        X = check_array(X, accept_sparse='csc', dtype=np.double)
        X = self._augment(X)
        return self._get_output(X)

polynomial_network.py 文件源码项目：polylearn 作者: scikit-learn-contrib 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _predict(self, X):
        if not hasattr(self, "U_"):
            raise NotFittedError("Estimator not fitted.")

        X = check_array(X, accept_sparse='csc', dtype=np.double)
        X = self._augment(X)
        X = get_dataset(X, order='fortran')
        return _lifted_predict(self.U_, X)

util.py 文件源码项目：motion-classification 作者: matthiasplappert 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def check_feature_array(array, n_features=None):
    array = check_array(array, ensure_2d=True, allow_nd=False)
    if n_features is not None and array.shape[1] != n_features:
        raise ValueError('feature array must have exactly %d features' % n_features)
    return array

util.py 文件源码项目：motion-classification 作者: matthiasplappert 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def check_multilabel_array(array, n_labels=None, force_binary=True):
    array = check_array(array, ensure_2d=True, allow_nd=False, dtype=int)
    if n_labels is not None and array.shape[1] != n_labels:
        raise ValueError('multilabel array must have exactly %d labels' % n_labels)
    if force_binary:
        count_ones = np.count_nonzero(array == 1)
        count_zeros = np.count_nonzero(array == 0)
        if np.size(array) != count_ones + count_zeros:
            raise ValueError('multilabel array must be binary')
    return array