def get_feature_importance(feature):
import scipy.stats as sps
import pandas as pd
y_train = pd.read_csv('../data/train.csv')['is_duplicate']
return sps.spearmanr(feature,y_train)[0]
# import pickle
# pickle.dump(X_train,open("data_train.pkl", 'wb'), protocol=2)
#
# data_file=['test_deptree','test_glove_sim_dist','test_pca_glove',
# 'test_pca_pattern','test_w2w','test_pos','test_pca_char']
#
# path='../test/'
# for it in range(6):
# tmp=[]
# flist=[item+str(it) for item in data_file]
# test=np.empty((400000,0))
# if it==5:
# test=np.empty((345796,0))
# for f in flist:
# test=np.hstack([test,pd.read_pickle(path+f+'.pkl')])
# pickle.dump(test,open('data_test{0}.pkl'.format(it),'wb'),protocol=2)
评论列表
文章目录