two_sigma_financial_modelling.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:PortfolioTimeSeriesAnalysis 作者: MizioAnd 项目源码 文件源码
def feature_engineering(self, df):
        is_skewness_correction_for_all_features = 1
        if is_skewness_correction_for_all_features:
            # Correcting for skewness
            # Treat all numerical variables that were not one-hot encoded
            if any(tuple(df.columns == 'y')):
                self.is_with_log1p_call_outcome = 1

            numerical_feature_names_of_non_modified_df = TwoSigmaFinModTools._numerical_feature_names

            if TwoSigmaFinModTools._is_one_hot_encoder:
                numerical_feature_names_of_non_modified_df = numerical_feature_names_of_non_modified_df.values
            else:
                numerical_feature_names_of_non_modified_df = np.concatenate(
                    [TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])

            relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
                (df[numerical_feature_names_of_non_modified_df].columns != 'Id')]
            self.skew_correction(df, relevant_features)
        else:
            # Only scale down Call Outcome, since all leave other numerical features standardized.
            if any(tuple(df.columns == 'Call Outcome')):
                self.is_with_log1p_call_outcome = 1
                df.loc[:, tuple(['Call Outcome'])] = np.log1p(df['Call Outcome'])
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号