def __do_label_encoding(self):
df_train, _ = self.res_data_dict[g_singletonDataFilePath.getTrainDir()]
df_testset1 = self.res_data_dict[g_singletonDataFilePath.getTest1Dir()]
df_testset2 = self.res_data_dict[g_singletonDataFilePath.getTest2Dir()]
le = LabelEncoder()
cross_feature_dict = self.__get_label_encode_dict()
for _, new_feature_name in cross_feature_dict.iteritems():
to_be_stacked = [df_train[new_feature_name], df_testset1[new_feature_name], df_testset2[new_feature_name]]
le.fit(pd.concat(to_be_stacked, axis=0))
df_train[new_feature_name] = le.transform(df_train[new_feature_name])
df_testset1[new_feature_name] = le.transform(df_testset1[new_feature_name])
df_testset2[new_feature_name] = le.transform(df_testset2[new_feature_name])
return
评论列表
文章目录