def fit(self, X, y=None): # assumes all columns of X are strings Xdict = X.to_dict('records') self.dv = DictVectorizer(sparse=False) self.dv.fit(Xdict) return self