def __init__(self, ngram_range=(1, 1), analyzer='word', count=True,
n_features=200):
"""Initializes the classifier.
Args:
ngram_range (tuple): Pair of ints specifying the range of ngrams.
analyzer (string): Determines what type of analyzer to be used.
Setting it to 'word' will consider each word as a unit of language
and 'char' will consider each character as a unit of language.
count (boolean): Determines if features are counts of n-grams
versus a binary value encoding if the n-gram is present or not.
n_features (int): Maximum number of features used.
"""
# checking what type of vectorizer to create
if count:
self.vectorizer = CountVectorizer(analyzer=analyzer,
ngram_range=ngram_range,
max_features=n_features)
else:
self.vectorizer = HashingVectorizer(analyzer=analyzer,
ngram_range=ngram_range,
n_features=n_features)
评论列表
文章目录