def cvectorize(f, c, n):
r"""Use the Count Vectorizer and TF-IDF Transformer.
Parameters
----------
f : pandas.DataFrame
Dataframe containing the column ``c``.
c : str
Name of the text column in the dataframe ``f``.
n : int
The number of n-grams.
Returns
-------
new_features : sparse matrix
The transformed features.
References
----------
To use count vectorization and TF-IDF, you can find more
information here [TFE]_.
.. [TFE] http://scikit-learn.org/stable/modules/feature_extraction.html#text-feature-extraction
"""
fc = f[c]
fc.fillna(BSEP, inplace=True)
cvect = CountVectorizer(ngram_range=[1, n], analyzer='char')
cfeat = cvect.fit_transform(fc)
tfidf_transformer = TfidfTransformer()
new_features = tfidf_transformer.fit_transform(cfeat).toarray()
return new_features
#
# Function apply_treatment
#
评论列表
文章目录