def ensemble_classification(self, scoring_metric='roc_auc', trained_model_by_name=None):
"""
This provides a simple way to put data in and have healthcare.ai train a few models and pick the best one for
your data.
Args:
scoring_metric (str): The metric used to rank the models. Defaults to 'roc_auc'
trained_model_by_name (dict): A dictionary of trained models to compare for a custom ensemble
Returns:
TrainedSupervisedModel: The best TrainedSupervisedModel found.
"""
self.validate_classification('Ensemble Classification')
self.validate_score_metric_for_number_of_classes(scoring_metric)
score_by_name = {}
# Here is the default list of algorithms to try for the ensemble
# Adding an ensemble method is as easy as adding a new key:value pair in the `model_by_name` dictionary
if trained_model_by_name is None:
# TODO because these now all return TSMs it will be additionally slow by all the factor models.
# TODO Could these be trained separately then after the best is found, train the factor model and add to TSM?
trained_model_by_name = {
'KNN': self.knn(randomized_search=True, scoring_metric=scoring_metric),
'Logistic Regression': self.logistic_regression(randomized_search=True),
'Random Forest Classifier': self.random_forest_classifier(
trees=200,
randomized_search=True,
scoring_metric=scoring_metric)}
for name, model in trained_model_by_name.items():
# Unroll estimator from trained supervised model
estimator = hcai_tsm.get_estimator_from_trained_supervised_model(model)
# Get the score objects for the estimator
score = self.metrics(estimator)
self._console_log('{} algorithm: score = {}'.format(name, score))
# TODO this may need to ferret out each classification score separately
score_by_name[name] = score[scoring_metric]
sorted_names_and_scores = sorted(score_by_name.items(), key=lambda x: x[1])
best_algorithm_name, best_score = sorted_names_and_scores[-1]
best_model = trained_model_by_name[best_algorithm_name]
self._console_log('Based on the scoring metric {}, the best algorithm found is: {}'.format(scoring_metric,
best_algorithm_name))
self._console_log('{} {} = {}'.format(best_algorithm_name, scoring_metric, best_score))
return best_model
advanced_supvervised_model_trainer.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录