def plot_ROC(test_labels, test_predictions):
fpr, tpr, thresholds = metrics.roc_curve(
test_labels, test_predictions, pos_label=1)
auc = "%.2f" % metrics.auc(fpr, tpr)
title = 'ROC Curve, AUC = '+str(auc)
with plt.style.context(('ggplot')):
fig, ax = plt.subplots()
ax.plot(fpr, tpr, "#000099", label='ROC curve')
ax.plot([0, 1], [0, 1], 'k--', label='Baseline')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.title(title)
return fig
python类auc()的实例源码
def getAccuracyAucOnOneTask(self, task_list, task, debug=False):
X_t, y_t = self.extractTaskData(task_list,task)
if len(X_t) == 0:
return np.nan, np.nan
preds = self.internal_predict(X_t, int(task))
if debug:
print "y_t:", y_t
print "preds:", preds
acc = helper.getBinaryAccuracy(preds,y_t)
if len(y_t) > 1 and helper.containsEachSVMLabelType(y_t) and helper.containsEachSVMLabelType(preds):
auc = roc_auc_score(y_t, preds)
else:
auc = np.nan
return acc, auc
def getAUC(self,test_tasks):
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
for t in range(self.n_tasks):
X_t, Y_t = self.extractTaskData(self.train_tasks,t)
X_test_t, Y_test_t = self.extractTaskData(test_tasks, t)
overallKernel = self.constructKernelFunction(t)
self.classifiers[t] = SVC(C=self.C, kernel=overallKernel, probability=True, max_iter=self.max_iter_internal, tol=self.tolerance)
probas_ = self.classifiers[t].fit(X_t, Y_t).predict_proba(X_test_t)
fpr, tpr, thresholds = roc_curve(Y_test_t, probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
mean_tpr /= self.n_tasks
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
return mean_auc, mean_fpr, mean_tpr
def pred_accuracy(y_true, y_pred):
y_true = sp.copy(y_true)
if len(sp.unique(y_true))==2:
print 'dichotomous trait, calculating AUC'
y_min = y_true.min()
y_max = y_true.max()
if y_min!= 0 or y_max!=1:
y_true[y_true==y_min]=0
y_true[y_true==y_max]=1
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
else:
print 'continuous trait, calculating COR'
cor = sp.corrcoef(y_true,y_pred)[0,1]
return cor
def get_pr(reference_frames,output_frames,mode='type',pr_resolution=100):
# filter output by confidence
confidence = collect_confidence(output_frames)
conf_order = []
step = 100/float(pr_resolution)
for j in range(1,pr_resolution+1):
conf_order.append( np.percentile(confidence, j*step) )
conf_order = [-1] + conf_order + [2]
# get curve
params = []
for threshold in conf_order:
params.append( [ reference_frames, output_frames, confidence, threshold, mode ] )
all_tp, all_fp, all_fn, all_prec, all_rec = zip(*pool.map(single_point, params))
all_prec = list(all_prec) #+ [0]
all_rec = list(all_rec) #+ [1]
all_rec, all_prec = zip(*sorted(zip(all_rec, all_prec)))
AUC = metrics.auc(all_rec, all_prec)
return all_rec, all_prec, AUC
# create complete output for 1 mode --------------------------------------------
def addFold(self, fold_id, true_labels, predicted_proba, predicted_scores):
if len(true_labels) == 0:
return
if self.probabilist_model:
scores = predicted_proba
else:
scores = predicted_scores
fpr, tpr, thresholds = roc_curve(true_labels, scores)
self.mean_tpr += interp(self.mean_fpr, fpr, tpr)
self.thresholds = interp(self.mean_fpr, fpr, thresholds)
self.mean_tpr[0] = 0.0
self.thresholds[0] = 1.0
self.thresholds[-1] = 0.0
roc_auc = auc(fpr, tpr)
if self.num_folds > 1:
self.ax1.plot(fpr, tpr, lw = 1,
label = 'ROC fold %d (area = %0.2f)' % (fold_id, roc_auc))
else:
self.ax1.plot(fpr, tpr, lw = 3,
color = colors_tools.getLabelColor('all'),
label = 'ROC (area = %0.2f)' % (roc_auc))
def plot(self, output_file):
self.ax1.plot([0, 1], [0, 1], '--', lw = 1,
color = (0.6, 0.6, 0.6), label = 'Luck')
if self.num_folds > 1:
self.mean_tpr /= self.num_folds
self.mean_tpr[-1] = 1.0
mean_auc = auc(self.mean_fpr, self.mean_tpr)
self.ax1.plot(self.mean_fpr, self.mean_tpr, 'k--',
label = 'Mean ROC (area = %0.2f)' % mean_auc, lw = 2)
self.ax1.set_xlim([-0.05, 1.05])
self.ax1.set_ylim([-0.05, 1.05])
self.ax1.set_xlabel('False Positive Rate')
self.ax1.set_ylabel('True Positive Rate')
self.ax1.set_title('ROC Curve')
self.ax1.legend(loc = 'lower right')
self.fig.savefig(output_file)
plt.close(self.fig)
def toJson(self, f):
perf = {}
if self.auc:
perf['auc'] = {'mean': str(int(self.auc_mean*10000)/100) + '%',
'std': int(self.auc_std*10000)/10000}
if self.probabilist_model:
perf['thresholds'] = [{} for x in self.thresholds]
for t in self.thresholds:
for v in self.perf_threshold_summary[t].index:
perf['thresholds'][t][v] = {}
perf['thresholds'][t][v]['mean'] = str(int(self.perf_threshold_summary[t].loc[v, 'mean']*10000)/100)
perf['thresholds'][t][v]['mean'] += '%'
perf['thresholds'][t][v]['std'] = int(self.perf_threshold_summary[t].loc[v, 'std']*10000)/10000
else:
for v in self.perf_threshold_summary.index:
perf[v] = {}
perf[v]['mean'] = floats_tools.toPercentage(self.perf_threshold_summary.loc[v, 'mean'])
perf[v]['std'] = floats_tools.trunc(self.perf_threshold_summary.loc[v, 'std'])
json.dump(perf, f, indent = 2)
def get_auc(outputs, probas):
''' AUC is a common metric for binary classification
methods by comparing true & false positive rates
Args
----
outputs : numpy array
true outcomes (OxTxN)
probas : numpy array
predicted probabilities (OxTxN)
Returns
-------
auc : integer
'''
fpr, tpr, _ = roc_curve(outputs, probas[:, 1])
return auc(fpr, tpr)
def plot_auc(self, estimator, estimator_name, neg, pos):
try:
classifier_probas = estimator.decision_function(self.X_test)
except AttributeError:
classifier_probas = estimator.predict_proba(self.X_test)[:, 1]
false_positive_r, true_positive_r, thresholds = metrics.roc_curve(self.y_test, classifier_probas)
roc_auc = metrics.auc(false_positive_r, true_positive_r)
label = '{:.1f}% neg:{} pos:{} {}'.format(roc_auc * 100, neg, pos, estimator_name)
plt.plot(false_positive_r, true_positive_r, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.title('ROC score(s)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right', prop={'size': 10})
plt.savefig("ROC.png", dpi=300, bbox_inches='tight')
plt.grid()
def get_fpr_tpr_roc(model, test_data, test_truth, labels):
y_pred = model.predict(test_data, batch_size=32, verbose=0)
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for k in labels.keys():
cur_idx = labels[k]
fpr[labels[k]], tpr[labels[k]], _ = roc_curve(test_truth[:,cur_idx], y_pred[:,cur_idx])
roc_auc[labels[k]] = auc(fpr[labels[k]], tpr[labels[k]])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(test_truth.ravel(), y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
return fpr, tpr, roc_auc
def ExtGBDT(train_x, train_y, test_x, test_y):
""" Ext-GBDT """
num_round = 100
param = {'objective': 'binary:logistic', 'booster': 'gbtree', 'eta': 0.03, 'max_depth': 3, 'eval_metric': 'auc',
'silent': 1, 'min_child_weight': 0.1, 'subsample': 0.7, 'colsample_bytree': 0.8, 'nthread': 4,
'max_delta_step': 0}
train_X = xgb.DMatrix(train_x, train_y)
test_X = xgb.DMatrix(test_x)
bst = xgb.train(param, train_X, num_round)
pred = bst.predict(test_X)
predict_y = []
for i in range(len(pred)):
if pred[i] < 0.5:
predict_y.append(0)
else:
predict_y.append(1)
auc = evaluate_auc(pred, test_y)
evaluate(predict_y, test_y)
return auc
def DTEnsemble(train_x, train_y, test_x, test_y):
""" ??? ?? """
total = np.zeros(len(test_y))
sub_num = 10
for i in range(sub_num):
sub_train_x, sub_train_y = sub_sample(train_x, train_y)
pred = sub_DT(sub_train_x, sub_train_y, test_x, test_y)
total += pred
avg_pred = total / sub_num
avg_predict = []
for i in range(len(avg_pred)):
if avg_pred[i] < 0.5:
avg_predict.append(0)
else:
avg_predict.append(1)
auc = evaluate_auc(avg_pred, test_y)
evaluate(avg_predict, test_y)
return auc
def LREnsemble(train_x, train_y, test_x, test_y):
""" ???? ?? """
total = np.zeros(len(test_y))
sub_num = 10
for i in range(sub_num):
sub_train_x, sub_train_y = sub_sample(train_x, train_y)
pred = sub_LR(sub_train_x, sub_train_y, test_x, test_y)
total += pred
avg_pred = total / sub_num
avg_predict = []
for i in range(len(avg_pred)):
if avg_pred[i] < 0.5:
avg_predict.append(0)
else:
avg_predict.append(1)
auc = evaluate_auc(avg_pred, test_y)
evaluate(avg_predict, test_y)
return auc
def RFEnsemble(train_x, train_y, test_x, test_y):
""" ???? ?? """
total = np.zeros(len(test_y))
sub_num = 10
for i in range(sub_num):
sub_train_x, sub_train_y = sub_sample(train_x, train_y)
pred = sub_RF(sub_train_x, sub_train_y, test_x, test_y)
total += pred
avg_pred = total / sub_num
avg_predict = []
for i in range(len(avg_pred)):
if avg_pred[i] < 0.5:
avg_predict.append(0)
else:
avg_predict.append(1)
auc = evaluate_auc(avg_pred, test_y)
evaluate(avg_predict, test_y)
return auc
def ExtGBDTEnsemble(train_x, train_y, test_x, test_y):
""" Ext-GBDT ?? """
total = np.zeros(len(test_y))
sub_num = 10
for i in range(sub_num):
sub_train_x, sub_train_y = sub_sample(train_x, train_y)
pred = subExtGBDT(train_x, train_y, test_x, test_y)
total += pred
avg_pred = total / sub_num
avg_predict = []
for i in range(len(avg_pred)):
if avg_pred[i] < 0.5:
avg_predict.append(0)
else:
avg_predict.append(1)
auc = evaluate_auc(avg_pred, test_y)
evaluate(avg_predict, test_y)
return auc
def plot_roc(fpr,tpr,figure_name="roc.png"):
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
roc_auc = auc(fpr, tpr)
fig = plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
fig.savefig(os.path.join(LOG_DIR,figure_name), dpi=fig.dpi)
def plot_roc(fpr,tpr,figure_name="roc.png"):
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
roc_auc = auc(fpr, tpr)
fig = plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
fig.savefig(os.path.join(LOG_DIR,figure_name), dpi=fig.dpi)
def plot_ROC_by_class(y_true, y_pred, classes, ls='-'):
print y_true.shape
print y_pred.shape
best_thresh = {}
for class_name, c in classes.items(): # for each class
# Compute ROC curve
fpr, tpr, thresholds = roc_curve(y_true[:, c], y_pred[:, c])
roc_auc = auc(fpr, tpr)
# Plot ROC curve
plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(class_name, roc_auc), linestyle=ls)
# Calculate J statistic
J = [j_statistic(y_true[:, c], y_pred[:, c], t) for t in thresholds]
j_best = np.argmax(J)
# Store best threshold for each class
best_thresh[class_name] = J[j_best]
return best_thresh
def plot_PR_by_class(y_pred, y_true, classes, out_path):
best_thresh = {}
for class_name, c in classes.items(): # for each class
# Compute ROC curve
precision, recall, thresholds = precision_recall_curve(y_true[:, c], y_pred[:, c])
pr_auc = auc(recall, precision)
# Plot PR curve
plt.plot(recall, precision, label='{}, AUC = {:.3f}'.format(class_name, pr_auc))
# Calculate J statistic
J = [j_statistic(y_true, y_pred, t) for t in thresholds]
j_best = np.argmax(J)
# Store best threshold for each class
best_thresh[class_name] = J[j_best]
return best_thresh
def plot_roc_auc(predictions, ground_truth, name=''):
# Calculate ROC curve
y_pred = np.asarray(predictions).ravel()
y_true = np.asarray(ground_truth).ravel()
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)
# Plot
plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(name, roc_auc))
# # Return index of best model by J statistic
# J = [j_statistic(y_true, y_pred, t) for t in thresholds]
#
# return thresholds[np.argmax(J)] # TODO test this out!
def _score_macro_average(self, n_classes):
"""
Compute the macro average scores for the ROCAUC curves.
"""
# Gather all FPRs
all_fpr = np.unique(np.concatenate([self.fpr[i] for i in range(n_classes)]))
avg_tpr = np.zeros_like(all_fpr)
# Compute the averages per class
for i in range(n_classes):
avg_tpr += interp(all_fpr, self.fpr[i], self.tpr[i])
# Finalize the average
avg_tpr /= n_classes
# Store the macro averages
self.fpr[MACRO] = all_fpr
self.tpr[MACRO] = avg_tpr
self.roc_auc[MACRO] = auc(self.fpr[MACRO], self.tpr[MACRO])
##########################################################################
## Quick method for ROCAUC
##########################################################################
def print_details(self):
print
total_pos = self.tp + self.fp
total_neg = self.tn + self.fn
print '\tTrue\tFalse\t\tTotal'
print 'Pos', self.tp, '\t', self.fp, ' \t|\t\t', total_pos
print 'Neg', self.tn, '\t', self.fn, ' \t|\t\t', total_neg
print '\t', str(self.tp + self.tn), '\t', str(self.fp + self.fn), '\t\t\t|', str(total_pos + total_neg)
print '------------------------------------'
print 'Accuracy', self.accuracy()
print 'Precision', self.precision()
print 'Recall', self.recall()
print 'AUC: ', self.auc(), ' ', self.auc(average='micro')
print '\nPositives',
print 'Average: ', self.positiveAverage(), '\tDeviation: ', self.getDeviation('positive'), '\ttotal', len(
self.positives)
print '\nNegatives',
print 'Average: ', self.negativeAverage(), '\tDeviation: ', self.getDeviation('negative'), '\ttotal', len(
self.negatives)
def default_inv_roc_curve(Y_true, var, sample_weight=None):
"""Default ROC curve for a single variable.
Args:
Y_true: array of true classes (n*2).
var: array of variable values.
sample_weight: array of sample weights.
Returns:
Array of (signal efficiency, 1/[background efficiency]) pairs.
"""
fpr, tpr, _ = roc_curve(Y_true[:, 0], var, sample_weight=sample_weight)
print("AUC: {0:.4f}".format(auc(fpr, tpr, reorder=True)))
res = 1./len(Y_true)
return np.array([[tp, 1./max(fp, res)]
for tp,fp in zip(tpr,fpr)
if fp > 0.])
def plot_roc(y_test, y_pred, label=''):
"""Compute ROC curve and ROC area"""
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
# Plot of a ROC curve for a specific class
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic' + label)
plt.legend(loc="lower right")
plt.show()
def compute_roc(probs_neg, probs_pos, plot=False):
"""
TODO
:param probs_neg:
:param probs_pos:
:param plot:
:return:
"""
probs = np.concatenate((probs_neg, probs_pos))
labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
fpr, tpr, _ = roc_curve(labels, probs)
auc_score = auc(fpr, tpr)
if plot:
plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, color='blue',
label='ROC (AUC = %0.4f)' % auc_score)
plt.legend(loc='lower right')
plt.title("ROC Curve")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.show()
return fpr, tpr, auc_score
MLNPCapstone.py 文件源码
项目:machine-learning-nanodegree-program-capstone
作者: harrylippy
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def plot_roc(self):
for learner, clf in self._clf.iteritems():
# Make the predictions
(X_test, y_test) = self._test_data
y_pred = clf.predict(X_test)
# Get (f)alse (p)ositive (r)ate, (t)rue (p)ositive (r)ate
fpr, tpr, _ = roc_curve(y_test, y_pred)
# Add this classifier's results to the plot
plt.plot(fpr, tpr, label='%s (area = %0.2f)'\
% (learner, auc(fpr, tpr)))
# Now do the plot
# NOTE: plot code stolen from scikit-learn docs (http://bit.ly/236k6M3)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()
def score_func_to_gridsearch(estimator, X_test=None, y_test=None):
""" Function to be given as a scorer function to Grid Search Method.
It is going to transform the matrix os predicts generated by 'all' option
to an final accuracy score. Use a high value to CV
"""
if not hasattr(estimator, 'fitnesses_'):
raise ValueError("Fit")
obj1=[]
obj2=[]
for i in range(len(estimator.best_pareto_front_)):
obj1.append(estimator.best_pareto_front_[i].fitness.values[0])
obj2.append(estimator.best_pareto_front_[i].fitness.values[1])
obj1.append(obj1[0])
obj2.append(1)
return auc(obj2, obj1, reorder=True)
def evaluate(self, data, labels, site, sess=None):
"""
Runs one evaluation against the full epoch of data.
Return the precision and the number of correct predictions.
Batch evaluation saves memory and enables this to run on smaller GPUs.
sess: the session in which the model has been trained.
op: the Tensor that returns the number of correct predictions.
data: size N x M
N: number of signals (samples)
M: number of vertices (features)
labels: size N
N: number of signals (samples)
"""
t_process, t_wall = time.process_time(), time.time()
scores, loss = self.predict(data, labels, site, sess)
fpr, tpr, _ = roc_curve(labels, scores)
roc_auc = auc(fpr, tpr)
string = 'samples: {:d}, AUC : {:.2f}, loss: {:.4e}'.format(len(labels), roc_auc, loss)
if sess is None:
string += '\ntime: {:.0f}s (wall {:.0f}s)'.format(time.process_time() - t_process, time.time() - t_wall)
return string, roc_auc, loss, scores
def clf_scores(clf, x_train, y_train, x_test, y_test):
info = dict()
# TODO: extend this to a confusion matrix per fold for more flexibility downstream (tuning)
# TODO: calculate a set of ROC curves per fold instead of running it on test, currently introducing bias
scores = cross_val_score(clf, x_train, y_train, cv=cv, n_jobs=-1)
runtime = time()
clf.fit(x_train, y_train)
runtime = time() - runtime
y_test_predicted = clf.predict(x_test)
info['runtime'] = runtime
info['accuracy'] = min(scores)
info['accuracy_test'] = accuracy_score(y_test, y_test_predicted)
info['accuracy_folds'] = scores
info['confusion_matrix'] = confusion_matrix(y_test, y_test_predicted)
clf.fit(x_train, y_train)
fpr, tpr, _ = roc_curve(y_test, clf_predict_proba(clf, x_test))
info['fpr'] = fpr
info['tpr'] = tpr
info['auc'] = auc(fpr, tpr)
return info