def rf_from_cfg(cfg, seed):
"""
Creates a random forest regressor from sklearn and fits the given data on it.
This is the function-call we try to optimize. Chosen values are stored in
the configuration (cfg).
Parameters:
-----------
cfg: Configuration
configuration chosen by smac
seed: int or RandomState
used to initialize the rf's random generator
Returns:
-----------
np.mean(rmses): float
mean of root mean square errors of random-forest test predictions
per cv-fold
"""
rfr = RandomForestRegressor(
n_estimators=cfg["num_trees"],
criterion=cfg["criterion"],
min_samples_split=cfg["min_samples_to_split"],
min_samples_leaf=cfg["min_samples_in_leaf"],
min_weight_fraction_leaf=cfg["min_weight_frac_leaf"],
max_features=cfg["max_features"],
max_leaf_nodes=cfg["max_leaf_nodes"],
bootstrap=cfg["do_bootstrapping"],
random_state=seed)
def rmse(y, y_pred):
return np.sqrt(np.mean((y_pred - y)**2))
# Creating root mean square error for sklearns crossvalidation
rmse_scorer = make_scorer(rmse, greater_is_better=False)
score = cross_val_score(rfr, boston.data, boston.target, cv=11, scoring=rmse_scorer)
return -1 * np.mean(score) # Because cross_validation sign-flips the score
评论列表
文章目录