python类ShuffleSplit()的实例源码

predict_2017_06_19_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_19_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=15,
                                               random_state=seed,
                                               n_estimators=2500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_18_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += p
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += reg.predict_proba(test2)[:,1]

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_18_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=15,
                                               random_state=seed,
                                               n_estimators=2500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_17_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += p
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += np.log1p(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_07_05_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def keras_common(train3, y, test3, v, z, num_splits, cname, build_model, seed = 1234):
    v[cname], z[cname] = 0, 0
    np.random.seed(seed)
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    scores = list()
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        model = build_model()
        model.fit(
                xtrain, ytrain,
                batch_size = 128,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=build_keras_fit_callbacks(model_path),
                shuffle=True
            )
        model.load_weights(model_path)
        p = model.predict(xval)
        v.loc[ival, cname] += pconvert(p).ravel()
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(model.predict(test3)).ravel()
        del model
        for i in range(3): gc.collect(i)
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
predict_2017_07_06_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def keras_common(train3, y, test3, v, z, num_splits, cname, build_model, seed = 1234, batch_size = 128):
    v[cname], z[cname] = 0, 0
    np.random.seed(seed)
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    scores = list()
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        model = build_model()
        model.fit(
                xtrain, ytrain,
                batch_size = batch_size,
                epochs = 10000,
                validation_data = (xval, yval),
                verbose = 0,
                callbacks = build_keras_fit_callbacks(model_path),
                shuffle = True
            )
        model.load_weights(model_path)
        p = model.predict(xval)
        v.loc[ival, cname] += pconvert(p).ravel()
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(model.predict(test3)).ravel()
        del model
        for i in range(3): gc.collect(i)
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
predict_2017_06_20_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 7
    num_splits = 17
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_20_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 7
    num_splits = 17
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=7,
                                               random_state=seed,
                                               n_estimators=1500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_07_05_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def keras_common(train3, y, test3, v, z, num_splits, cname, build_model, seed = 1234, batch_size = 128):
    v[cname], z[cname] = 0, 0
    np.random.seed(seed)
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    scores = list()
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        model = build_model()
        model.fit(
                xtrain, ytrain,
                batch_size = batch_size,
                epochs = 10000,
                validation_data = (xval, yval),
                verbose = 0,
                callbacks = build_keras_fit_callbacks(model_path),
                shuffle = True
            )
        model.load_weights(model_path)
        p = model.predict(xval)
        v.loc[ival, cname] += pconvert(p).ravel()
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(model.predict(test3)).ravel()
        del model
        for i in range(3): gc.collect(i)
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
predict_2017_06_18_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=15,
                                               random_state=seed,
                                               n_estimators=2500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += p
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += reg.predict_proba(test2)[:,1]

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_21_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 7
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_21_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 7
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=11,
                                               random_state=seed,
                                               n_estimators=1500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_07_05_1.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def keras_common(train3, y, test3, v, z, num_splits, cname, build_model, seed = 1234, batch_size = 128):
    v[cname], z[cname] = 0, 0
    np.random.seed(seed)
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    scores = list()
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        model = build_model()
        model.fit(
                xtrain, ytrain,
                batch_size = batch_size,
                epochs = 10000,
                validation_data = (xval, yval),
                verbose = 0,
                callbacks = build_keras_fit_callbacks(model_path),
                shuffle = True
            )
        model.load_weights(model_path)
        p = model.predict(xval)
        v.loc[ival, cname] += pconvert(p).ravel()
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(model.predict(test3)).ravel()
        del model
        for i in range(3): gc.collect(i)
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
predict_2017_06_19_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 7
    num_splits = 17
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_07_04_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def keras_common(train3, y, test3, v, z, num_splits, cname, build_model, seed = 1234):
    v[cname], z[cname] = 0, 0
    np.random.seed(seed)
    build_model().summary(line_length=120)
    model_path = '../data/working/' + cname + '_keras_model.h5'
    ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=11, test_size=1/num_splits)
    scores = list()
    for n, (itrain, ival) in enumerate(ss.split(train3, y)):
        xtrain, xval = train3[itrain], train3[ival]
        ytrain, yval = y[itrain], y[ival]
        model = build_model()
        model.fit(
                xtrain, ytrain,
                batch_size = 128,
                epochs=10000,
                validation_data=(xval, yval),
                verbose=0,
                callbacks=build_keras_fit_callbacks(model_path),
                shuffle=True
            )
        model.load_weights(model_path)
        p = model.predict(xval)
        v.loc[ival, cname] += pconvert(p).ravel()
        score = metrics.log_loss(y[ival], p)
        print(cname, 'fold %d: '%(n+1), score, now())
        scores.append(score)
        z[cname] += pconvert(model.predict(test3)).ravel()
        del model
        for i in range(3): gc.collect(i)
    os.remove(model_path)

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits
predict_2017_06_18_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_18_4.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 5
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=15,
                                               random_state=seed,
                                               n_estimators=2500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_20_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 7
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.RandomForestClassifier(max_depth=9,
                                               random_state=seed,
                                               n_estimators=500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds
predict_2017_06_20_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def et1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    scores = list()
    num_seeds = 3
    num_splits = 7
    base_seed = 13
    ss = model_selection.ShuffleSplit(n_splits=num_splits)
    for seed in range(base_seed, base_seed + num_seeds):
        ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
        for n, (itrain, ival) in enumerate(ss.split(train2, y)):
            reg = ensemble.ExtraTreesClassifier(max_depth=11,
                                               random_state=seed,
                                               n_estimators=1500,
                                               n_jobs=-2)
            reg.fit(train2[itrain], y[itrain])
            p = reg.predict_proba(train2[ival])[:,1]
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
            scores.append(score)
            z[cname] += pconvert(reg.predict_proba(test2)[:,1])

    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= num_splits * num_seeds
    v[cname] /= num_seeds


问题


面经


文章

微信
公众号

扫码关注公众号