def get_additional_features(train,test,magic=False,ID=False):
col = list(test.columns)
if ID!=True:
col.remove('ID')
n_comp = 12
# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(train[col])
tsvd_results_test = tsvd.transform(test[col])
# PCA
pca = PCA(n_components=n_comp, random_state=420)
pca2_results_train = pca.fit_transform(train[col])
pca2_results_test = pca.transform(test[col])
# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train[col])
ica2_results_test = ica.transform(test[col])
# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train[col])
grp_results_test = grp.transform(test[col])
# SRP
srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
srp_results_train = srp.fit_transform(train[col])
srp_results_test = srp.transform(test[col])
for i in range(1, n_comp + 1):
train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]
train['pca_' + str(i)] = pca2_results_train[:, i - 1]
test['pca_' + str(i)] = pca2_results_test[:, i - 1]
train['ica_' + str(i)] = ica2_results_train[:, i - 1]
test['ica_' + str(i)] = ica2_results_test[:, i - 1]
train['grp_' + str(i)] = grp_results_train[:, i - 1]
test['grp_' + str(i)] = grp_results_test[:, i - 1]
train['srp_' + str(i)] = srp_results_train[:, i - 1]
test['srp_' + str(i)] = srp_results_test[:, i - 1]
if magic==True:
magic_mat = train[['ID','X0','y']]
magic_mat = magic_mat.groupby(['X0'])['y'].mean()
magic_mat = pd.DataFrame({'X0':magic_mat.index,'magic':list(magic_mat)})
mean_magic = magic_mat['magic'].mean()
train = train.merge(magic_mat,on='X0',how='left')
test = test.merge(magic_mat,on='X0',how = 'left')
test['magic'] = test['magic'].fillna(mean_magic)
return train,test
## Preparing stacking functions. Each one takes the out of bag values as the Input
## xgb will not be used in this case, but still post it here.
Jason_Liu_stack_res.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录