def setup_training_columns(self):
""" Return array of Training Columns.
When "training_columns" array is empty it means return all columns except the "target_column"
"""
training_columns = self.prediction_config.DATASET_LOCATION[self.dataset_choice]["training_columns"]
if not training_columns and not isinstance(self.df_listings, type(None)):
features = self.df_listings.columns.tolist()
# Remove "target_column" (if already in the dataset, as may not yet have been generated by Clustering)
if self.target_column in features:
features.remove(self.target_column)
# Remove columns containing Excluded full text
for index, column_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_FULL_TEXT):
if column_name in features:
features.remove(column_name)
# Retain columns that do not contain Excluded partial text
is_features_to_retain = [False] * len(features)
for idx_outer, column_partial_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_PARTIAL_TEXT):
for idx_inner, column_name in enumerate(features):
if column_partial_name not in column_name:
is_features_to_retain[idx_inner] = True
filtered = list(compress(features, is_features_to_retain))
return filtered
else:
return training_columns
评论列表
文章目录