def get_pipeline(clf=RandomForestClassifier(n_estimators=100, class_weight="balanced")): return make_pipeline(DictVectorizer(sparse=False), clf)