def getTFV(token_pattern = token_pattern,
norm = tfidf__norm,
max_df = tfidf__max_df,
min_df = tfidf__min_df,
ngram_range = (1, 1),
vocabulary = None,
stop_words = 'english'):
tfv =TfidfVectorizer(min_df=min_df, max_df=max_df, max_features=None,
strip_accents='unicode', analyzer='word',
token_pattern=token_pattern,
ngram_range=ngram_range, use_idf=True,
smooth_idf=True, sublinear_tf=True,
stop_words = stop_words, norm=norm, vocabulary=vocabulary)
return tfv
#========= CountVectorizer =========#
评论列表
文章目录