feature_select.py 文件源码-python代码片段

feature_select.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：movie-quality-profitability-predictor 作者: wbowditch 项目源码文件源码

def main():
     data_table = pd.read_csv("total_set.csv",index_col=0)
     film_titles = data_table.index  # list of all of our movie titles in the dataset.
     #print film_titles
     lst = [   'Sequel',
               'Budget',
               'YouTube Trailer Views',
               'YouTube Like',
               'YouTube Dislike',
               'YouTube Like:Dislike',
               'Reddit UpVotes',
               'Distributor',
               'Reddit Ratio',
               'Reddit Comments',
               'Date',
               'Runtime',
               'MPAA',
               'Comedy',
               'Action/Adventure',
               'Animated',
               'Drama'
               ]

     data = data_table[lst]
     target = data_table['Profitable']

     print data.shape
     data_new = SelectKBest(f_classif, k=10).fit_transform(data, target)
     print data_new.shape

     no_select=compute_cross_fold(data, target)
     with_select=compute_cross_fold(data_new, target)

     print no_select
     print with_select