def feature_engineering(self, df):
is_skewness_correction_for_all_features = 1
if is_skewness_correction_for_all_features:
# Correcting for skewness
# Treat all numerical variables that were not one-hot encoded
if any(tuple(df.columns == 'y')):
self.is_with_log1p_call_outcome = 1
numerical_feature_names_of_non_modified_df = TwoSigmaFinModTools._numerical_feature_names
if TwoSigmaFinModTools._is_one_hot_encoder:
numerical_feature_names_of_non_modified_df = numerical_feature_names_of_non_modified_df.values
else:
numerical_feature_names_of_non_modified_df = np.concatenate(
[TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])
relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
(df[numerical_feature_names_of_non_modified_df].columns != 'Id')]
self.skew_correction(df, relevant_features)
else:
# Only scale down Call Outcome, since all leave other numerical features standardized.
if any(tuple(df.columns == 'Call Outcome')):
self.is_with_log1p_call_outcome = 1
df.loc[:, tuple(['Call Outcome'])] = np.log1p(df['Call Outcome'])
two_sigma_financial_modelling.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录