def XGBOUT2(bp, all_samples,train_samp,Xcoords, Ycoords, Zcoords,k,threshold,nthread,bootstrap = True):
'''Function that takes a CI test data-set and returns classification accuracy after Nearest-Neighbor Bootstrap'''
num_samp = len(all_samples)
if bootstrap:
np.random.seed()
random.seed()
I = np.random.choice(num_samp,size = num_samp, replace = True)
samples = all_samples[I,:]
else:
samples = all_samples
Xtrain,Ytrain,Xtest,Ytest,CI_data = CI_sampler_conditional_kNN(all_samples[:,Xcoords],all_samples[:,Ycoords], all_samples[:,Zcoords],train_samp,k)
model = xgb.XGBClassifier(nthread=nthread,learning_rate =0.02, n_estimators=bp['n_estimator'], max_depth=bp['max_depth'],min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=bp['colsample_bytree'],objective= 'binary:logistic',scale_pos_weight=1, seed=11)
gbm = model.fit(Xtrain,Ytrain)
pred = gbm.predict_proba(Xtest)
pred_exact = gbm.predict(Xtest)
acc1 = accuracy_score(Ytest, pred_exact)
AUC1 = roc_auc_score(Ytest,pred[:,1])
del gbm
gbm = model.fit(Xtrain[:,len(Xcoords)::],Ytrain)
pred = gbm.predict_proba(Xtest[:,len(Xcoords)::])
pred_exact = gbm.predict(Xtest[:,len(Xcoords)::])
acc2 = accuracy_score(Ytest, pred_exact)
AUC2 = roc_auc_score(Ytest,pred[:,1])
del gbm
if AUC1 > AUC2 + threshold:
return [0.0, AUC1 - AUC2 , AUC2 - 0.5, acc1 - acc2, acc2 - 0.5]
else:
return [1.0, AUC1 - AUC2, AUC2 - 0.5, acc1 - acc2, acc2 - 0.5]
评论列表
文章目录