def calc_auc(y_pred_proba, labels, exp_run_folder, classifier, fold):
auc = roc_auc_score(labels, y_pred_proba)
fpr, tpr, thresholds = roc_curve(labels, y_pred_proba)
curve_roc = np.array([fpr, tpr])
dataile_id = open(exp_run_folder+'/data/roc_{}_{}.txt'.format(classifier, fold), 'w+')
np.savetxt(dataile_id, curve_roc)
dataile_id.close()
plt.plot(fpr, tpr, label='ROC curve: AUC={0:0.2f}'.format(auc))
plt.xlabel('1-Specificity')
plt.ylabel('Sensitivity')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.grid(True)
plt.title('ROC Fold {}'.format(fold))
plt.legend(loc="lower left")
plt.savefig(exp_run_folder+'/data/roc_{}_{}.pdf'.format(classifier, fold), format='pdf')
return auc
python类roc_auc_score()的实例源码
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def auc_score(res_list):
gp_list = np.array([])
anno_list = np.array([])
for res in res_list:
g_pred = res.g_pred
anno = res.annotation
if g_pred.shape[-1] < anno.shape[-1]:
anno = np.delete(anno, range(g_pred.shape[-1], anno.shape[-1]), axis=-1)
elif g_pred.shape[-1] > anno.shape[-1]:
g_pred = np.delete(g_pred, range(anno.shape[-1], g_pred.shape[-1]), axis=-1)
gp_list = g_pred.T if len(gp_list) == 0 else np.append(gp_list, g_pred.T, axis=0)
anno_list = anno.T if len(anno_list) == 0 else np.append(anno_list, anno.T, axis=0)
assert(gp_list.shape == anno_list.shape)
from sklearn.metrics import roc_auc_score
class_auc = roc_auc_score(anno_list, gp_list, average=None)
print('AUC of Classes:')
print(class_auc)
all_micro_auc = roc_auc_score(anno_list, gp_list, average='micro')
print('Total micro AUC: {}'.format(all_micro_auc))
all_macro_auc = roc_auc_score(anno_list, gp_list, average='macro')
print('Total macro AUC: {}'.format(all_macro_auc))
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def score(self, profiles, bin_sites):
"""Compute AUC ROC from predictions."""
app_profiles = list()
app_true_vals = list()
for k, profile in profiles.iteritems():
app_profiles.append(profile)
true_vals = np.zeros(len(profile))
bins = bin_sites.get(k, False)
if bins is not False:
for s, e, _ in bins:
true_vals[s:e] = 1
app_true_vals.append(true_vals)
vec_profiles = np.concatenate(app_profiles)
vec_true_vals = np.concatenate(app_true_vals)
roc_auc = roc_auc_score(vec_true_vals, vec_profiles)
return roc_auc
def getAccuracyAucOnAllTasks(self, task_list):
all_task_Y = []
all_preds = []
for i in range(len(task_list)):
preds, task_Y = self.getPredsTrueOnOneTask(task_list,i)
if preds is None:
# Skipping task because it does not have valid data
continue
if len(task_Y)>0:
all_task_Y.extend(task_Y)
all_preds.extend(preds)
if not helper.containsEachLabelType(all_preds):
print "for some bizarre reason, the preds for all tasks are the same class"
print "preds", all_preds
print "true_y", all_task_Y
auc = np.nan
else:
auc=roc_auc_score(all_task_Y, all_preds)
acc=hblr.getBinaryAccuracy(all_preds,all_task_Y)
return acc,auc
def getAccuracyAucOnOneTask(self, task_list, task, debug=False):
X_t, y_t = self.extractTaskData(task_list,task)
if len(X_t) == 0:
return np.nan, np.nan
preds = self.internal_predict(X_t, int(task))
if debug:
print "y_t:", y_t
print "preds:", preds
acc = helper.getBinaryAccuracy(preds,y_t)
if len(y_t) > 1 and helper.containsEachSVMLabelType(y_t) and helper.containsEachSVMLabelType(preds):
auc = roc_auc_score(y_t, preds)
else:
auc = np.nan
return acc, auc
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def on_epoch_end(self, epoch, logs={}):
if epoch % self.interval == 0:
y_pred = self.model.predict(self.X_val, verbose=0)
#print(np.sum(y_pred[:,1]))
#y_true = np.argmax(self.y_val, axis=1)
#y_pred = np.argmax(y_pred, axis=1)
#print(y_true.shape, y_pred.shape)
if self.mymil:
score = roc_auc_score(self.y_val.max(axis=1), y_pred.max(axis=1))
else: score = roc_auc_score(self.y_val[:,1], y_pred[:,1])
print("interval evaluation - epoch: {:d} - auc: {:.2f}".format(epoch, score))
if score > self.auc:
self.auc = score
for f in os.listdir('./'):
if f.startswith(self.filepath+'auc'):
os.remove(f)
self.model.save(self.filepath+'auc'+str(score)+'ep'+str(epoch)+'.hdf5')
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def perform(self, node, inputs, output_storage):
"""
Calculate ROC AUC score.
Parameters
----------
node : Apply instance
Symbolic inputs and outputs.
inputs : list
Sequence of inputs.
output_storage : list
List of mutable 1-element lists.
"""
if roc_auc_score is None:
raise RuntimeError("Could not import from sklearn.")
y_true, y_score = inputs
try:
roc_auc = roc_auc_score(y_true, y_score)
except ValueError:
roc_auc = np.nan
#rvalue = np.array((roc_auc, prec, reca, f1))
#[0][0]
output_storage[0][0] = theano._asarray(roc_auc, dtype=config.floatX)
def setUp(self):
os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
self.X_class, self.y_class = datasets.make_classification(random_state=42)
self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
self.reg_scorer = Scorer("mse", metrics.mean_squared_error)
self.classification_task_split = \
Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
self.regression_task_split = \
Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)
self.classification_task_cv = \
Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
self.regression_task_cv = \
Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)
def classifier_accuracy_report(self, prediction_vector, threshold=0.5):
""" Determine AUC and other metrics, write report.
prediction_vector: vector of booleans (or outcome
probabilities) of length n_subjects,
e.g. self.point_predictions, self.ensemble_probabilities()...
If this has dtype other than bool, prediction_vector > threshold
is used for the confusion matrix.
Returns: one string (multiple lines joined with \n, including
trailing newline) containing a formatted report.
"""
auc = roc_auc_score(self.model.data.y.astype(float), prediction_vector.astype(float))
if not (prediction_vector.dtype == np.bool):
prediction_vector = prediction_vector >= threshold
conf = confusion_matrix(self.model.data.y, prediction_vector)
lines = ['AUC: %.3f' % auc,
'Confusion matrix: \n\t%s' % str(conf).replace('\n','\n\t')]
return '\n'.join(lines) + '\n'
########################################
# BAYES-FACTOR-BASED METHODS
def eval_semantics(scores, gt, args):
from sklearn.metrics import roc_auc_score
num_semantics = gt.shape[1]
acc, auc = np.nan*np.zeros((num_semantics,)), np.nan*np.zeros((num_semantics,))
if args.semantics == ATTRIBUTES:
for s, (pred, lbl) in enumerate(zip(scores.T, gt.T)):
acc[s] = (pred*(lbl-0.5) > 0).astype(float).mean()
if sum(lbl == 0) > 0 and sum(lbl == 1) > 0:
auc[s] = roc_auc_score(lbl, pred)
else:
for s, (pred, lbl) in enumerate(zip(scores, gt.T)):
acc[s] = (pred.argmax(axis=1) == lbl).astype(float).mean()
onehot = np.zeros(pred.shape)
for i, l in enumerate(lbl):
onehot[i, int(l)] = 1
if (onehot.sum(axis=0) == 0).sum() == 0:
auc[s] = roc_auc_score(onehot, pred)
return acc, auc
def test():
y = []
yp = []
fi = open(sys.argv[1], 'r')
for line in fi:
data = ints(line.replace(":1", "").split())
clk = data[1]
mp = data[2]
fsid = 3 # feature start id
pred = 0.0
for i in range(fsid, len(data)):
feat = data[i]
if feat in featWeight:
pred += featWeight[feat]
pred = sigmoid(pred)
y.append(clk)
yp.append(pred)
fi.close()
auc = roc_auc_score(y, yp)
rmse = math.sqrt(mean_squared_error(y, yp))
print str(round) + '\t' + str(auc) + '\t' + str(rmse)
def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
# Print additional metrics involving predictions
n_rows = (y_dset.shape[0] / batch_size) * batch_size
y_true = y_dset[0:n_rows, :].flatten()
y_pred = y_pred.flatten()
val_ap = average_precision_score(y_true, y_pred)
val_roc = roc_auc_score(y_true, y_pred)
n = y_true.size
n_pos = y_true.sum()
idx_sorted = np.argsort(-y_pred)
val_rec = []
logging.info(dset + "-AP {:.6f}".format(val_ap))
logging.info(dset + "-ROC {:.6f}".format(val_roc))
for i, v in enumerate([10, 25, 50, 75, 100]):
tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
val_rec.append(tp * 1.0 / n_pos)
logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
return val_ap, val_rec[2]
# ############################## Main program #################################
def plot_roc_curve(y_true, y_score, ax=None):
'''
Plot the Receiving Operator Characteristic curved, including the
Area under the Curve (AUC) score.
Parameters
----------
y_true : array
y_score : array
ax : matplotlib.axes, defaults to new axes
Returns
-------
ax : matplotlib.axes
'''
ax = ax or plt.axes()
auc = metrics.roc_auc_score(y_true, y_score)
fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
ax.plot(fpr, tpr)
ax.annotate('AUC: {:.2f}'.format(auc), (.8, .2))
ax.plot([0, 1], [0, 1], linestyle='--', color='k')
return ax
def cross_validate(classifier, n_folds = 5):
'''Custom cross-validation module I always use '''
train_X = classifier['train_X']
train_y = classifier['train_y']
model = classifier['model']
score = 0.0
skf = KFold(n_splits = n_folds)
for train_index, test_index in skf.split(train_X):
X_train, X_test = train_X[train_index], train_X[test_index]
y_train, y_test = train_y[train_index], train_y[test_index]
clf = model.fit(X_train,y_train)
pred = clf.predict_proba(X_test)[:,1]
#print 'cross', roc_auc_score(y_test,pred)
score = score + roc_auc_score(y_test,pred)
return score/n_folds
def analyzeResult_temp(data,model,DataVecs):
predict = model.predict(DataVecs)
data['predict'] = predict
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation and result analysis uing adjusted thresholds
def analyzeResult(data,model,DataVecs,threshold):
predict = model.predict_proba(DataVecs)[:,1]
True,False=1,0
data['predict'] = (predict > threshold)
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation
def print_evaluation_result(clf, bags_test, args):
pred_score = np.array([clf(B.data()) for B in bags_test])
pred_label = np.array([1 if score >= 0 else -1 for score in pred_score])
true_label = np.array([B.y for B in bags_test])
a = accuracy (pred_label, true_label) # accuracy
p = precision(pred_label, true_label) # precision
r = recall (pred_label, true_label) # recall
f = f_score (pred_label, true_label) # F-score
auc = metrics.roc_auc_score((true_label+1)/2, pred_score)
if not args.aucplot:
sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
sys.stdout.flush()
else:
sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
# {:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
sys.stdout.flush()
np.savetxt(sys.stdout.buffer, np.c_[pred_score, true_label])
s12_run_xgboost_only_train_create.py 文件源码
项目:KAGGLE_AVITO_2016
作者: ZFTurbo
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def run_train_with_model(train, features, model_path):
start_time = time.time()
gbm = xgb.Booster()
gbm.load_model(model_path)
print("Validating...")
check = gbm.predict(xgb.DMatrix(train[features]))
score = roc_auc_score(train['isDuplicate'].values, check)
validation_df = pd.DataFrame({'itemID_1': train['itemID_1'].values, 'itemID_2': train['itemID_2'].values,
'isDuplicate': train['isDuplicate'].values, 'probability': check})
print('AUC score value: {:.6f}'.format(score))
imp = get_importance(gbm, features)
print('Importance array: ', imp)
print('Prediction time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
return validation_df, score
def classification():
# Generate a random binary classification problem.
X, y = make_classification(n_samples=350, n_features=15, n_informative=10,
random_state=1111, n_classes=2,
class_sep=1., n_redundant=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
random_state=1111)
model = GradientBoostingClassifier(n_estimators=50, max_depth=4,
max_features=8, learning_rate=0.1)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(predictions)
print(predictions.min())
print(predictions.max())
print('classification, roc auc score: %s'
% roc_auc_score(y_test, predictions))
def test_mlp():
y_train_onehot = one_hot(y_train)
y_test_onehot = one_hot(y_test)
model = NeuralNet(
layers=[
Dense(256, Parameters(init='uniform', regularizers={'W': L2(0.05)})),
Activation('relu'),
Dropout(0.5),
Dense(128, Parameters(init='normal', constraints={'W': MaxNorm()})),
Activation('relu'),
Dense(2),
Activation('softmax'),
],
loss='categorical_crossentropy',
optimizer=Adadelta(),
metric='accuracy',
batch_size=64,
max_epochs=25,
)
model.fit(X_train, y_train_onehot)
predictions = model.predict(X_test)
assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95
def train_classifier(x_train, y_train, x_cv, y_cv):
clf = RandomForestClassifier(n_estimators=100)
print 'starting fit'
# excluding the patient_id column from the fit and prediction (patient_id?)
clf.fit(x_train[::5], y_train[::5])
print 'starting pred'
y_pred = np.zeros(x_cv.shape[0])
for i in xrange(4):
y_pred[i::4] = clf.predict_proba(x_cv[i::4])[:, 1]
if y_cv is not None:
print roc_auc_score(y_cv, y_pred)
return y_pred, clf