def get_extra_train():
##############################extra features##################################
train_simhash_features=pd.read_csv('data/extra_feature/train_simhash_features.csv')
train_selftrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/train_selftrained_w2v_sim_dist.pkl')
train_selftrained_glove_sim_dist=pd.read_pickle('data/extra_feature/train_selftrained_glove_sim_dist.pkl')
train_pretrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/train_pretrained_w2v_sim_dist.pkl')
train_distinct_word_stats_selftrained_glove=pd.read_csv('data/extra_feature/train_distinct_word_stats_selftrained_glove.csv')
train_distinct_word_stats_pretrained=pd.read_csv('data/extra_feature/train_distinct_word_stats_pretrained.csv')
train_distinct_word_stats=pd.read_csv('data/extra_feature/train_distinct_word_stats.csv')
X_train=np.hstack([train_simhash_features,
train_selftrained_w2v_sim_dist,
train_selftrained_glove_sim_dist,
train_pretrained_w2v_sim_dist,
train_distinct_word_stats_selftrained_glove,
train_distinct_word_stats_pretrained,
train_distinct_word_stats,])
print X_train.shape
return X_train
评论列表
文章目录