def feature_mapping_to_numerical_values(self, df):
TwoSigmaFinModTools._is_one_hot_encoder = 0
mask = ~df.isnull()
# Assume that training set has all possible feature_var_names
# Although it may occur in real life that a training set may hold a feature_var_name. But it is probably
# avoided since such features cannot
# be part of the trained learning algo.
# Add missing feature_var_names of training set not occurring in test set. Add these with zeros in columns.
if not any(tuple(df.columns == 'y')):
# All one-hot encoded feature var names occurring in test data is assigned the public variable
# df_test_all_feature_var_names.
self.df_test_all_feature_var_names = df.columns
_feature_names_num = np.zeros((TwoSigmaFinModTools._non_numerical_feature_names.shape[0],), dtype=object)
ith = 0
for feature_name in TwoSigmaFinModTools._non_numerical_feature_names:
# Create a feature_nameNum list
feature_name_num = ''.join([feature_name, 'Num'])
_feature_names_num[ith] = feature_name_num
ith += 1
TwoSigmaFinModTools.encode_labels_in_numeric_format(df, feature_name)
if TwoSigmaFinModTools._is_one_hot_encoder:
is_with_label_binarizer = 0
if is_with_label_binarizer:
mapper_df = DataFrameMapper([(feature_name, LabelBinarizer())], df_out=True)
feature_var_values = mapper_df.fit_transform(df.copy())
print(df[feature_name].isnull().sum().sum())
print(df[feature_name][mask[feature_name]].isnull().sum().sum())
for ite in feature_var_values.columns:
df[ite] = feature_var_values[ite]
else:
TwoSigmaFinModTools.one_hot_encoder(df, feature_name)
TwoSigmaFinModTools._feature_names_num = pd.Series(data=_feature_names_num, dtype=object)
two_sigma_financial_modelling.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录