utils.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码
def make_mf_regression(X ,y, clf, X_test, n_folds=5,seed=1024,nb_epoch=50,max_features=0.75,name='xgb',path=''):
    n = X.shape[0]
    '''
    Fit metafeature by @clf and get prediction for test. Assumed that @clf -- classifier
    '''
    print clf
    for epoch in range(nb_epoch):
        print "Start epoch:",epoch
        mf_tr = np.zeros(X.shape[0])
        mf_te = np.zeros(X_test.shape[0])
        skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed).split(X,y)


        for ind_tr, ind_te in skf:
            X_tr = X[ind_tr]
            X_te = X[ind_te]

            y_tr = y[ind_tr]
            y_te = y[ind_te]
            clf.fit(X_tr, y_tr)
            mf_tr[ind_te] += clf.predict(X_te)
            del X_tr
            del X_te

            l = 600000
            y_pred = []
            for batch in range(4):
                X_tmp = X_test[l*batch:l*(batch+1)]
                y_pred.append(clf.predict(X_tmp))
            y_pred = np.concatenate(y_pred)
            mf_te += y_pred
            score = log_loss(y_te, mf_tr[ind_te])
            print '\tpred[{}] score:{}'.format(epoch, score)
        mf_te/=n_folds
        pd.to_pickle(mf_tr,path+'X_mf_%s_%s_random.pkl'%(name,epoch))
        pd.to_pickle(mf_te,path+'X_t_mf_%s_%s_random.pkl'%(name,epoch))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号