def grid_search(estimator, data, featTypes=('BoW',), nFolds=10, random_seed=44, param_grid=()):
labels = [x.severity for x in data]
generatePrimaryFeats(data, featTypes)
featurized = []
for d in data:
instance = {}
for featname, values in d.feats.items():
# Give each feature a unique name to avoid overwriting features.
# If e.g. a concept feature has the same name as a bow word, the old code
# would overwrite one of the features.
instance.update({"{0}-{1}".format(featname, k): v for k, v in values.items()})
featurized.append(instance)
d = DictVectorizer()
x_train = d.fit_transform(featurized)
folds = cross_validation.StratifiedKFold(labels, n_folds=nFolds, shuffle=True, random_state=random_seed)
grid = GridSearchCV(estimator, param_grid=param_grid, scoring="f1", n_jobs=-1, cv=folds)
fit_grid = grid.fit(x_train, labels)
print(fit_grid.best_params_)
return fit_grid.best_params_
评论列表
文章目录