def one_hot_encoder(df, estimated_var):
df_class = df.copy()
ohe = OneHotEncoder()
label_classes = df_class[estimated_var].factorize()[1]
new_one_hot_encoded_features = [''.join([estimated_var, '_', x]) for x in label_classes]
mask = ~df[estimated_var].isnull()
feature_var_values = ohe.fit_transform(np.reshape(np.array(df[''.join([estimated_var, 'Num'])][mask].values),
(df[mask].shape[0], 1))).toarray().astype(int)
# Create new feature_var columns with one-hot encoded values
for ite in new_one_hot_encoded_features:
df[ite] = df[estimated_var]
df.loc[mask, tuple(new_one_hot_encoded_features)] = feature_var_values
评论列表
文章目录