def gridSearch(data, params, true_k):
tfidf = TfidfVectorizer(strip_accents=None,
lowercase=True,
sublinear_tf=True,
analyzer='word')
lr_tfidf = Pipeline([('vect', tfidf),
('clf', KMeans(init='k-means++',
n_jobs=-1,
random_state=0,
verbose=0))])
gsTfIdf = GridSearchCV(
lr_tfidf, params, n_jobs=1, verbose=1)
gsTfIdf.fit(data)
print()
print("Best score: %0.3f" % gsTfIdf.best_score_)
print("Best parameters set:")
best_parameters = gsTfIdf.best_estimator_.get_params()
for param_name in sorted(params.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
评论列表
文章目录