python类train()的实例源码

model.py 文件源码 项目:zhihu-machine-learning-challenge-2017 作者: HouJP 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fit(self,
            train_fs, train_labels,
            valid_fs, valid_labels):
        rank_k = self.config.getint('RANK', 'rank_k')

        train_DMatrix = xgb.DMatrix(train_fs, label=train_labels)
        train_DMatrix.set_group([rank_k] * (len(train_labels) / rank_k))
        valid_DMatrix = xgb.DMatrix(valid_fs, label=valid_labels)
        valid_DMatrix.set_group([rank_k] * (len(valid_labels) / rank_k))

        watchlist = [(train_DMatrix, 'train'), (valid_DMatrix, 'valid')]
        # self.__lock()
        self.model = xgb.train(self.params,
                               train_DMatrix,
                               self.params['num_round'],
                               watchlist,
                               early_stopping_rounds=self.params['early_stop'],
                               verbose_eval=self.params['verbose_eval'])
        LogUtil.log('INFO', 'best_ntree_limit=%d' % self.model.best_ntree_limit)
        # self.__unlock()
        valid_preds = self.model.predict(valid_DMatrix, ntree_limit=self.model.best_ntree_limit)
        return valid_preds
est_utils.py 文件源码 项目:gcforest 作者: w821881341 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def xgb_train(train_config, X_train, y_train, X_test, y_test):
    import xgboost as xgb
    LOGGER.info("X_train.shape={}, y_train.shape={}, X_test.shape={}, y_test.shape={}".format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    param = train_config["param"]
    xg_train = xgb.DMatrix(X_train, label=y_train)
    xg_test = xgb.DMatrix(X_test, label=y_test)
    num_round = int(train_config["num_round"])
    watchlist = [(xg_train, 'train'), (xg_test, 'test')]
    try:
        bst = xgb.train(param, xg_train, num_round, watchlist)
    except KeyboardInterrupt:
        LOGGER.info("Canceld by user's Ctrl-C action")
        return
    y_pred = np.argmax(bst.predict(xg_test), axis=1)
    acc = 100. * np.sum(y_pred == y_test) / len(y_test)
    LOGGER.info("accuracy={}%".format(acc))
main.py 文件源码 项目:Sberbank 作者: dimaquick 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def Options():                                                                                                                                                              
    op = OptionParser()                                                                                                                                              
    op.add_option('-E', '--events')
    op.add_option('-l', '--train')
    op.add_option('-F', '--fstr')
    op.add_option('-B', '--bags')
    op.add_option('-R', '--seed')
    op.add_option('-T', '--trees')
    op.add_option('-O', '--out')
    op.add_option('-d', '--depth')
    op.add_option('-e', '--eta')
    op.add_option('-S', '--subsample')
    op.add_option('-v', '--toeval')
    op.add_option("-V", action="store_true", dest="verbose")                                                                                                              
    op.add_option("-D", action="store_true", dest="dump")                                                                                                              
    op.add_option("-Q", action="store_true", dest="names")                                                                                                              
    op.add_option("-P", action="store_true", dest="production")                                                                                                              
    return op.parse_args()[0]
l1_3_ho_xgb_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if debug_mode:
        saved = None
    if saved == None:
        train, y, test, _ = data.get()
        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
l1_3_ho_xgb_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if debug_mode:
        saved = None
    if saved == None:
        train, y, test, _ = data.get()
        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
l1_1_ho_xgb_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 14 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if saved == None:
        train, y, test, _ = data.get()
        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
l1_4_xgb_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if debug_mode:
        saved = None
    if saved == None:
        train, y, test, _ = data.get()
        ftrain, ftest, _ = fea_1.get()
        ftrain2, ftest2, _ = fea_2.get()
        train = pd.concat([train, ftrain, ftrain2], axis=1)
        test = pd.concat([test, ftest, ftest2], axis=1)
        print(train.shape, test.shape)

        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
l1_1_ho_xgb_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if saved == None:
        train, y, test, _ = data.get()
        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
l1_4_xgb_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def predict():
    saved = state.load('model')
    #saved = None
    if debug_mode:
        saved = None
    if saved == None:
        train, y, test, _ = data.get()
        ftrain, ftest, _ = fea_1.get()
        ftrain2, ftest2, _ = fea_2.get()
        train = pd.concat([train, ftrain, ftrain2], axis=1)
        test = pd.concat([test, ftest, ftest2], axis=1)
        print(train.shape, test.shape)

        z = pd.DataFrame()
        z['id'] = test.id
        z['y'] = 0

        v = pd.DataFrame()
        v['id'] = train.id
        v['y'] = y
        cv, _ = run(train, y, test, v, z)
        state.save('model', (v, z, cv, None))
    else:
        v, z, cv, _ = saved
    return v, z, cv, _
predict_2017_06_28_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
    v[cname], z[cname] = 0, 0
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    dtest = xgb.DMatrix(test2)
    for s in range(N_seeds):
        xgb_params['seed'] = s + base_seed
        for n, (itrain, ival) in enumerate(skf.split(train2, y)):
            dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
            dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
            watch = [(dtrain, 'train'), (dvalid, 'valid')]
            clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)

            p = clf.predict(dvalid)
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            z[cname]  += pconvert(clf.predict(dtest))
            print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
            scores.append(score)

    z[cname] /= N_splits * N_seeds
    v[cname] /= N_seeds
    print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
predict_2017_07_01_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
    v[cname], z[cname] = 0, 0
    scores = []
    dtest = xgb.DMatrix(test2)
    for s in range(N_seeds):
        xgb_params['seed'] = s + base_seed
        skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True, random_state=s + base_seed)
        for n, (itrain, ival) in enumerate(skf.split(train2, y)):
            dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
            dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
            watch = [(dtrain, 'train'), (dvalid, 'valid')]
            clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)

            p = clf.predict(dvalid)
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            z[cname]  += pconvert(clf.predict(dtest))
            print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
            scores.append(score)

    z[cname] /= N_splits * N_seeds
    v[cname] /= N_seeds
    print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
predict_2017_07_06_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def save_results(v, z):
    pred_path = '../submissions/p' + base_data_name() + '.csv'
    all_data_path = '../data/output/model' + base_data_name() + '.csv.gz'

    z.y = np.clip(z.y.values, 1e-5, 1-1e-5)
    z[['y']].to_csv(pred_path, header=None, index=False)

    v['train'] = 1
    z['train'] = 0

    q = pd.concat([v, z], axis=0)
    q.to_csv(all_data_path, index=False, compression='gzip')

    for c in z.columns:
        if c in {'id', 'train', 'y'}: continue
        z[c] = prestore(z[c])
    print(z.head(20))
    print('saved', pred_path, all_data_path)
predict_2017_07_04_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def save_results(v, z):
    pred_path = '../submissions/p' + base_data_name() + '.csv'
    all_data_path = '../data/output/model' + base_data_name() + '.csv.gz'

    z.y = np.clip(z.y.values, 1e-5, 1-1e-5)
    z[['y']].to_csv(pred_path, header=None, index=False)

    v['train'] = 1
    z['train'] = 0

    q = pd.concat([v, z], axis=0)
    q.to_csv(all_data_path, index=False, compression='gzip')

    for c in z.columns:
        if c in {'id', 'train', 'y'}: continue
        z[c] = prestore(z[c])
    print(z.head(20))
    print('saved', pred_path, all_data_path)
predict_2017_06_27_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
    v[cname], z[cname] = 0, 0
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    dtest = xgb.DMatrix(test2)
    for s in range(N_seeds):
        xgb_params['seed'] = s + base_seed
        for n, (itrain, ival) in enumerate(skf.split(train2, y)):
            dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
            dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
            watch = [(dtrain, 'train'), (dvalid, 'valid')]
            clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)

            p = clf.predict(dvalid)
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            z[cname]  += pconvert(clf.predict(dtest))
            print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
            scores.append(score)

    z[cname] /= N_splits * N_seeds
    v[cname] /= N_seeds
    print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
predict_2017_07_05_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def save_results(v, z):
    pred_path = '../submissions/p' + base_data_name() + '.csv'
    all_data_path = '../data/output/model' + base_data_name() + '.csv.gz'

    z.y = np.clip(z.y.values, 1e-5, 1-1e-5)
    z[['y']].to_csv(pred_path, header=None, index=False)

    v['train'] = 1
    z['train'] = 0

    q = pd.concat([v, z], axis=0)
    q.to_csv(all_data_path, index=False, compression='gzip')

    for c in z.columns:
        if c in {'id', 'train', 'y'}: continue
        z[c] = prestore(z[c])
    print(z.head(20))
    print('saved', pred_path, all_data_path)
predict_2017_07_05_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def save_results(v, z):
    pred_path = '../submissions/p' + base_data_name() + '.csv'
    all_data_path = '../data/output/model' + base_data_name() + '.csv.gz'

    z.y = np.clip(z.y.values, 1e-5, 1-1e-5)
    z[['y']].to_csv(pred_path, header=None, index=False)

    v['train'] = 1
    z['train'] = 0

    q = pd.concat([v, z], axis=0)
    q.to_csv(all_data_path, index=False, compression='gzip')

    for c in z.columns:
        if c in {'id', 'train', 'y'}: continue
        z[c] = prestore(z[c])
    print(z.head(20))
    print('saved', pred_path, all_data_path)
xgb_model.py 文件源码 项目:kaggle-review 作者: daxiongshu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fit(self,X,y,Xt=None,yt=None,
        load_model=None,save_model=None,
        obj=None,feval=None,print_fscore=True,evalx=None):
        print(X.shape,y.shape)

        num_round = self.params.get('num_round',100)
        early_stopping_rounds = self.params.get('early_stopping_rounds',None)
        maximize = self.params.get('maximize',False)
        dtrain = xgb.DMatrix(X, y)
        vb = self.params.get('verbose_eval',1)
        if Xt is not None:
            dvalid = xgb.DMatrix(Xt, yt)
            watchlist = [(dtrain, 'train'), (dvalid, 'valid')]
            bst = xgb.train(self.params, dtrain, num_round, evals = watchlist,
                early_stopping_rounds=early_stopping_rounds,verbose_eval=vb,
                xgb_model=load_model,obj=obj,feval=feval,maximize=maximize)
        else:
            watchlist = [(dtrain, 'train')]
            bst = xgb.train(self.params, dtrain, num_round, evals = watchlist,
                verbose_eval=vb,xgb_model=load_model,obj=obj,feval=feval)
        self.bst = bst
        if save_model is not None:
            bst.save_model(save_model)            

        fscore = self.feature_importance()
        if print_fscore:
            print("Feature Importance:")
            for i in fscore:
                print(i) 
        if Xt is not None and evalx is not None:
            yp = self.predict(Xt)
            score = evalx(yt,yp)
            print(score)
            return score
        return 0
two_sigma_financial_modelling.py 文件源码 项目:PortfolioTimeSeriesAnalysis 作者: MizioAnd 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def save_dataframe(self, df):
        with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, 'train_debug', self.timestamp, '.h5']), "w") as train:
            train.put("train_debug", df)
two_sigma_financial_modelling.py 文件源码 项目:PortfolioTimeSeriesAnalysis 作者: MizioAnd 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_dataframe():
        dataframe_name = 'train_debug'

        # one-hot encoded
        # not one-hot
        # date_time = '20170613_19h09m40s'
        # date_time = '20170613_19h34m31s'
        # date_time = '20170614_00h07m32s'
        date_time = '20170619_11h47m22s'
        with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, dataframe_name, date_time, '.h5']), 'r') as train:
            return train.get(dataframe_name)


问题


面经


文章

微信
公众号

扫码关注公众号