def feature_scaling(self, df):
df = df.copy()
# Standardization (centering and scaling) of dataset that removes mean and scales to unit variance
standard_scaler = StandardScaler()
numerical_feature_names_of_non_modified_df = TwoSigmaFinModTools._numerical_feature_names
if any(tuple(df.columns == 'y')):
if not TwoSigmaFinModTools._is_one_hot_encoder:
numerical_feature_names_of_non_modified_df = np.concatenate(
[TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])
# Include scaling of y
y = df['y'].values
relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
(df[numerical_feature_names_of_non_modified_df].columns != 'y')
& (df[numerical_feature_names_of_non_modified_df].columns != 'id')]
mask = ~df[relevant_features].isnull()
res = standard_scaler.fit_transform(X=df[relevant_features][mask].values, y=y)
if (~mask).sum().sum() > 0:
df = self.standardize_relevant_features(df, relevant_features, res)
else:
df.loc[:, tuple(relevant_features)] = res
else:
if not TwoSigmaFinModTools._is_one_hot_encoder:
numerical_feature_names_of_non_modified_df = np.concatenate(
[TwoSigmaFinModTools._feature_names_num.values, numerical_feature_names_of_non_modified_df.values])
relevant_features = df[numerical_feature_names_of_non_modified_df].columns[
(df[numerical_feature_names_of_non_modified_df].columns != 'id')]
mask = ~df[relevant_features].isnull()
res = standard_scaler.fit_transform(df[relevant_features][mask].values)
if mask.sum().sum() > 0:
df = self.standardize_relevant_features(df, relevant_features, res)
else:
df.loc[:, tuple(relevant_features)] = res
return df
two_sigma_financial_modelling.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录