def test_verify_features_finds_no_missing_features_when_none_are_missing():
np.random.seed(0)
df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()
column_descriptions = {
'survived': 'output'
, 'embarked': 'categorical'
, 'pclass': 'categorical'
, 'sex': 'categorical'
}
ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)
ml_predictor.train(df_titanic_train, verify_features=True)
file_name = ml_predictor.save(str(random.random()))
with open(file_name, 'rb') as read_file:
saved_ml_pipeline = dill.load(read_file)
os.remove(file_name)
missing_features = saved_ml_pipeline.named_steps['final_model'].verify_features(df_titanic_test)
print('missing_features')
print(missing_features)
print("len(missing_features['prediction_not_training'])")
print(len(missing_features['prediction_not_training']))
print("len(missing_features['training_not_prediction'])")
print(len(missing_features['training_not_prediction']))
assert len(missing_features['prediction_not_training']) == 0
assert len(missing_features['training_not_prediction']) == 0
评论列表
文章目录