def __init__(self, estimator, dtype=float, sparse=True):
"""
:param estimator: scikit-learn classifier object.
:param dtype: data type used when building feature array.
scikit-learn estimators work exclusively on numeric data. The
default value should be fine for almost all situations.
:param sparse: Whether to use sparse matrices internally.
The estimator must support these; not all scikit-learn classifiers
do (see their respective documentation and look for "sparse
matrix"). The default value is True, since most NLP problems
involve sparse feature sets. Setting this to False may take a
great amount of memory.
:type sparse: boolean.
"""
self._clf = estimator
self._encoder = LabelEncoder()
self._vectorizer = DictVectorizer(dtype=dtype, sparse=sparse)
评论列表
文章目录