def calc_auc(y_pred_proba, labels, exp_run_folder, classifier, fold):
auc = roc_auc_score(labels, y_pred_proba)
fpr, tpr, thresholds = roc_curve(labels, y_pred_proba)
curve_roc = np.array([fpr, tpr])
dataile_id = open(exp_run_folder+'/data/roc_{}_{}.txt'.format(classifier, fold), 'w+')
np.savetxt(dataile_id, curve_roc)
dataile_id.close()
plt.plot(fpr, tpr, label='ROC curve: AUC={0:0.2f}'.format(auc))
plt.xlabel('1-Specificity')
plt.ylabel('Sensitivity')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.grid(True)
plt.title('ROC Fold {}'.format(fold))
plt.legend(loc="lower left")
plt.savefig(exp_run_folder+'/data/roc_{}_{}.pdf'.format(classifier, fold), format='pdf')
return auc
python类roc_curve()的实例源码
def plot_ROC(test_labels, test_predictions):
fpr, tpr, thresholds = metrics.roc_curve(
test_labels, test_predictions, pos_label=1)
auc = "%.2f" % metrics.auc(fpr, tpr)
title = 'ROC Curve, AUC = '+str(auc)
with plt.style.context(('ggplot')):
fig, ax = plt.subplots()
ax.plot(fpr, tpr, "#000099", label='ROC curve')
ax.plot([0, 1], [0, 1], 'k--', label='Baseline')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.title(title)
return fig
def getAUC(self,test_tasks):
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
for t in range(self.n_tasks):
X_t, Y_t = self.extractTaskData(self.train_tasks,t)
X_test_t, Y_test_t = self.extractTaskData(test_tasks, t)
overallKernel = self.constructKernelFunction(t)
self.classifiers[t] = SVC(C=self.C, kernel=overallKernel, probability=True, max_iter=self.max_iter_internal, tol=self.tolerance)
probas_ = self.classifiers[t].fit(X_t, Y_t).predict_proba(X_test_t)
fpr, tpr, thresholds = roc_curve(Y_test_t, probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
mean_tpr /= self.n_tasks
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
return mean_auc, mean_fpr, mean_tpr
def pred_accuracy(y_true, y_pred):
y_true = sp.copy(y_true)
if len(sp.unique(y_true))==2:
print 'dichotomous trait, calculating AUC'
y_min = y_true.min()
y_max = y_true.max()
if y_min!= 0 or y_max!=1:
y_true[y_true==y_min]=0
y_true[y_true==y_max]=1
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
else:
print 'continuous trait, calculating COR'
cor = sp.corrcoef(y_true,y_pred)[0,1]
return cor
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
# Remove excluded candidates
FROCGTList_local = []
FROCProbList_local = []
for i in range(len(excludeList)):
if excludeList[i] == False:
FROCGTList_local.append(FROCGTList[i])
FROCProbList_local.append(FROCProbList[i])
numberOfDetectedLesions = sum(FROCGTList_local)
totalNumberOfLesions = sum(FROCGTList)
totalNumberOfCandidates = len(FROCProbList_local)
fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
print "WARNING, this system has no false positives.."
fps = np.zeros(len(fpr))
else:
fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
return fps, sens, thresholds
def _update_tsg_metrics(self, y_true, y_pred, prob):
self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
self.tsg_gene_score = pd.Series(prob, self.y.index)
# compute metrics for classification
self.tsg_gene_count[self.num_pred] = sum(y_pred)
prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
tsg_col = 1 # column for metrics relate to tsg
self.tsg_precision[self.num_pred] = prec[tsg_col]
self.tsg_recall[self.num_pred] = recall[tsg_col]
self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
self.num_pred + 1, str(prec), str(recall), str(fscore)))
# compute ROC curve metrics
fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
#self.tsg_tpr_array[0] = 0.0
# compute Precision-Recall curve metrics
p, r, thresh = metrics.precision_recall_curve(y_true, prob)
p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results
self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)
def addFold(self, fold_id, true_labels, predicted_proba, predicted_scores):
if len(true_labels) == 0:
return
if self.probabilist_model:
scores = predicted_proba
else:
scores = predicted_scores
fpr, tpr, thresholds = roc_curve(true_labels, scores)
self.mean_tpr += interp(self.mean_fpr, fpr, tpr)
self.thresholds = interp(self.mean_fpr, fpr, thresholds)
self.mean_tpr[0] = 0.0
self.thresholds[0] = 1.0
self.thresholds[-1] = 0.0
roc_auc = auc(fpr, tpr)
if self.num_folds > 1:
self.ax1.plot(fpr, tpr, lw = 1,
label = 'ROC fold %d (area = %0.2f)' % (fold_id, roc_auc))
else:
self.ax1.plot(fpr, tpr, lw = 3,
color = colors_tools.getLabelColor('all'),
label = 'ROC (area = %0.2f)' % (roc_auc))
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
# Remove excluded candidates
FROCGTList_local = []
FROCProbList_local = []
for i in range(len(excludeList)):
if excludeList[i] == False:
FROCGTList_local.append(FROCGTList[i])
FROCProbList_local.append(FROCProbList[i])
numberOfDetectedLesions = sum(FROCGTList_local)
totalNumberOfLesions = sum(FROCGTList)
totalNumberOfCandidates = len(FROCProbList_local)
fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
print "WARNING, this system has no false positives.."
fps = np.zeros(len(fpr))
else:
fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
return fps, sens, thresholds
def plot_roc_curve(y_true, y_score, ax=None):
'''
Plot the Receiving Operator Characteristic curved, including the
Area under the Curve (AUC) score.
Parameters
----------
y_true : array
y_score : array
ax : matplotlib.axes, defaults to new axes
Returns
-------
ax : matplotlib.axes
'''
ax = ax or plt.axes()
auc = metrics.roc_auc_score(y_true, y_score)
fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
ax.plot(fpr, tpr)
ax.annotate('AUC: {:.2f}'.format(auc), (.8, .2))
ax.plot([0, 1], [0, 1], linestyle='--', color='k')
return ax
def get_auc(outputs, probas):
''' AUC is a common metric for binary classification
methods by comparing true & false positive rates
Args
----
outputs : numpy array
true outcomes (OxTxN)
probas : numpy array
predicted probabilities (OxTxN)
Returns
-------
auc : integer
'''
fpr, tpr, _ = roc_curve(outputs, probas[:, 1])
return auc(fpr, tpr)
def plot_roc_curve(true_y, prob_y, out_file=None):
from sklearn.metrics import roc_curve
fpr, tpr, _ = roc_curve(true_y, prob_y)
fig = plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlim([-0.025, 1.025])
plt.ylim([-0.025, 1.025])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('RoC Curve')
if out_file is not None:
fig.savefig(out_file)
return fig
def plot_auc(self, estimator, estimator_name, neg, pos):
try:
classifier_probas = estimator.decision_function(self.X_test)
except AttributeError:
classifier_probas = estimator.predict_proba(self.X_test)[:, 1]
false_positive_r, true_positive_r, thresholds = metrics.roc_curve(self.y_test, classifier_probas)
roc_auc = metrics.auc(false_positive_r, true_positive_r)
label = '{:.1f}% neg:{} pos:{} {}'.format(roc_auc * 100, neg, pos, estimator_name)
plt.plot(false_positive_r, true_positive_r, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.title('ROC score(s)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right', prop={'size': 10})
plt.savefig("ROC.png", dpi=300, bbox_inches='tight')
plt.grid()
def get_fpr_tpr_roc(model, test_data, test_truth, labels):
y_pred = model.predict(test_data, batch_size=32, verbose=0)
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for k in labels.keys():
cur_idx = labels[k]
fpr[labels[k]], tpr[labels[k]], _ = roc_curve(test_truth[:,cur_idx], y_pred[:,cur_idx])
roc_auc[labels[k]] = auc(fpr[labels[k]], tpr[labels[k]])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(test_truth.ravel(), y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
return fpr, tpr, roc_auc
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
# Remove excluded candidates
FROCGTList_local = []
FROCProbList_local = []
for i in range(len(excludeList)):
if excludeList[i] == False:
FROCGTList_local.append(FROCGTList[i])
FROCProbList_local.append(FROCProbList[i])
numberOfDetectedLesions = sum(FROCGTList_local)
totalNumberOfLesions = sum(FROCGTList)
totalNumberOfCandidates = len(FROCProbList_local)
fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
print "WARNING, this system has no false positives.."
fps = np.zeros(len(fpr))
else:
fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
return fps, sens, thresholds
def test_all_metrics(model, data=None, usage_ratio=1):
if data is None:
X_train, y_train, X_test, y_test = read_data(usage_ratio=usage_ratio)
else:
# You ought to use the same training & testing set from your initial input.
X_train, y_train, X_test, y_test = data
y_pred = model.predict_classes(X_test)
y_ground = np.argmax(y_test, axis=1)
# y_proba = model.predict_proba(X_test)
# overall_acc = (y_pred == y_ground).sum() * 1. / y_pred.shape[0]
precision = sk.metrics.precision_score(y_ground, y_pred)
recall = sk.metrics.recall_score(y_ground, y_pred)
f1_score = sk.metrics.f1_score(y_ground, y_pred)
# confusion_matrix = sk.metrics.confusion_matrix(y_ground, y_pred)
# fpr, tpr, thresholds = sk.metrics.roc_curve(y_ground, y_pred)
print "precision_score = ", precision
print "recall_score = ", recall
print "f1_score = ", f1_score
# plot_roc_curve(y_test, y_proba)
plot_confusion_matrix(y_ground, y_pred)
def plot_ROC_by_class(y_true, y_pred, classes, ls='-'):
print y_true.shape
print y_pred.shape
best_thresh = {}
for class_name, c in classes.items(): # for each class
# Compute ROC curve
fpr, tpr, thresholds = roc_curve(y_true[:, c], y_pred[:, c])
roc_auc = auc(fpr, tpr)
# Plot ROC curve
plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(class_name, roc_auc), linestyle=ls)
# Calculate J statistic
J = [j_statistic(y_true[:, c], y_pred[:, c], t) for t in thresholds]
j_best = np.argmax(J)
# Store best threshold for each class
best_thresh[class_name] = J[j_best]
return best_thresh
def plot_roc_auc(predictions, ground_truth, name=''):
# Calculate ROC curve
y_pred = np.asarray(predictions).ravel()
y_true = np.asarray(ground_truth).ravel()
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)
# Plot
plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(name, roc_auc))
# # Return index of best model by J statistic
# J = [j_statistic(y_true, y_pred, t) for t in thresholds]
#
# return thresholds[np.argmax(J)] # TODO test this out!
def print_roc(self, y_true, y_scores, filename):
'''
Prints the ROC for this model.
'''
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_scores)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % self.roc_auc)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig(filename)
plt.close()
def default_inv_roc_curve(Y_true, var, sample_weight=None):
"""Default ROC curve for a single variable.
Args:
Y_true: array of true classes (n*2).
var: array of variable values.
sample_weight: array of sample weights.
Returns:
Array of (signal efficiency, 1/[background efficiency]) pairs.
"""
fpr, tpr, _ = roc_curve(Y_true[:, 0], var, sample_weight=sample_weight)
print("AUC: {0:.4f}".format(auc(fpr, tpr, reorder=True)))
res = 1./len(Y_true)
return np.array([[tp, 1./max(fp, res)]
for tp,fp in zip(tpr,fpr)
if fp > 0.])
def plot_roc(y_test, y_pred, label=''):
"""Compute ROC curve and ROC area"""
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
# Plot of a ROC curve for a specific class
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic' + label)
plt.legend(loc="lower right")
plt.show()
def compute_roc(probs_neg, probs_pos, plot=False):
"""
TODO
:param probs_neg:
:param probs_pos:
:param plot:
:return:
"""
probs = np.concatenate((probs_neg, probs_pos))
labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
fpr, tpr, _ = roc_curve(labels, probs)
auc_score = auc(fpr, tpr)
if plot:
plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, color='blue',
label='ROC (AUC = %0.4f)' % auc_score)
plt.legend(loc='lower right')
plt.title("ROC Curve")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.show()
return fpr, tpr, auc_score
MLNPCapstone.py 文件源码
项目:machine-learning-nanodegree-program-capstone
作者: harrylippy
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def plot_roc(self):
for learner, clf in self._clf.iteritems():
# Make the predictions
(X_test, y_test) = self._test_data
y_pred = clf.predict(X_test)
# Get (f)alse (p)ositive (r)ate, (t)rue (p)ositive (r)ate
fpr, tpr, _ = roc_curve(y_test, y_pred)
# Add this classifier's results to the plot
plt.plot(fpr, tpr, label='%s (area = %0.2f)'\
% (learner, auc(fpr, tpr)))
# Now do the plot
# NOTE: plot code stolen from scikit-learn docs (http://bit.ly/236k6M3)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()
def print_misclassified(y, pred, files, fom_func, threshold):
#fpr, tpr, thresholds = roc_curve(y, pred)
#fom = 0.01
#FoMs.append(1-tpr[np.where(fpr<=FPR)[0][-1]])
#FoM, threshold, fpr, tpr = fom_func(y, pred, fom)
negatives = np.where(y==0)
positives = np.where(y==1)
falsePositives = files[negatives][np.where(pred[negatives]>threshold)]
print "[+] False positives (%d):" % len(falsePositives)
for i,falsePositive in enumerate(falsePositives):
print "\t " + str(falsePositive), pred[negatives][np.where(pred[negatives]>threshold)][i]
print
missedDetections = files[positives][np.where(pred[positives]<=threshold)]
print "[+] Missed Detections (%d):" % len(missedDetections)
for i,missedDetection in enumerate(missedDetections):
print "\t " + str(missedDetection), pred[positives][np.where(pred[positives]<=threshold)][i]
print
def evaluate(self, data, labels, site, sess=None):
"""
Runs one evaluation against the full epoch of data.
Return the precision and the number of correct predictions.
Batch evaluation saves memory and enables this to run on smaller GPUs.
sess: the session in which the model has been trained.
op: the Tensor that returns the number of correct predictions.
data: size N x M
N: number of signals (samples)
M: number of vertices (features)
labels: size N
N: number of signals (samples)
"""
t_process, t_wall = time.process_time(), time.time()
scores, loss = self.predict(data, labels, site, sess)
fpr, tpr, _ = roc_curve(labels, scores)
roc_auc = auc(fpr, tpr)
string = 'samples: {:d}, AUC : {:.2f}, loss: {:.4e}'.format(len(labels), roc_auc, loss)
if sess is None:
string += '\ntime: {:.0f}s (wall {:.0f}s)'.format(time.process_time() - t_process, time.time() - t_wall)
return string, roc_auc, loss, scores
def clf_scores(clf, x_train, y_train, x_test, y_test):
info = dict()
# TODO: extend this to a confusion matrix per fold for more flexibility downstream (tuning)
# TODO: calculate a set of ROC curves per fold instead of running it on test, currently introducing bias
scores = cross_val_score(clf, x_train, y_train, cv=cv, n_jobs=-1)
runtime = time()
clf.fit(x_train, y_train)
runtime = time() - runtime
y_test_predicted = clf.predict(x_test)
info['runtime'] = runtime
info['accuracy'] = min(scores)
info['accuracy_test'] = accuracy_score(y_test, y_test_predicted)
info['accuracy_folds'] = scores
info['confusion_matrix'] = confusion_matrix(y_test, y_test_predicted)
clf.fit(x_train, y_train)
fpr, tpr, _ = roc_curve(y_test, clf_predict_proba(clf, x_test))
info['fpr'] = fpr
info['tpr'] = tpr
info['auc'] = auc(fpr, tpr)
return info
def test_roc_returns_consistency():
# Test whether the returned threshold matches up with tpr
# make small toy dataset
y_true, _, probas_pred = make_prediction(binary=True)
fpr, tpr, thresholds = roc_curve(y_true, probas_pred)
# use the given thresholds to determine the tpr
tpr_correct = []
for t in thresholds:
tp = np.sum((probas_pred >= t) & y_true)
p = np.sum(y_true)
tpr_correct.append(1.0 * tp / p)
# compare tpr and tpr_correct to see if the thresholds' order was correct
assert_array_almost_equal(tpr, tpr_correct, decimal=2)
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
def test_roc_nonrepeating_thresholds():
# Test to ensure that we don't return spurious repeating thresholds.
# Duplicated thresholds can arise due to machine precision issues.
dataset = datasets.load_digits()
X = dataset['data']
y = dataset['target']
# This random forest classifier can only return probabilities
# significant to two decimal places
clf = ensemble.RandomForestClassifier(n_estimators=100, random_state=0)
# How well can the classifier predict whether a digit is less than 5?
# This task contributes floating point roundoff errors to the probabilities
train, test = slice(None, None, 2), slice(1, None, 2)
probas_pred = clf.fit(X[train], y[train]).predict_proba(X[test])
y_score = probas_pred[:, :5].sum(axis=1) # roundoff errors begin here
y_true = [yy < 5 for yy in y[test]]
# Check for repeating values in the thresholds
fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False)
assert_equal(thresholds.size, np.unique(np.round(thresholds, 2)).size)
def test_roc_curve_one_label():
y_true = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
y_pred = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
# assert there are warnings
w = UndefinedMetricWarning
fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
# all true labels, all fpr should be nan
assert_array_equal(fpr,
np.nan * np.ones(len(thresholds)))
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
# assert there are warnings
fpr, tpr, thresholds = assert_warns(w, roc_curve,
[1 - x for x in y_true],
y_pred)
# all negative labels, all tpr should be nan
assert_array_equal(tpr,
np.nan * np.ones(len(thresholds)))
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
def plot_ROC(actual, predictions):
# plot the FPR vs TPR and AUC for a two class problem (0,1)
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predictions)
roc_auc = auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
def metric(model, test_csv, fname):
X, Y_true, headers = get_XY(test_csv)
Y_pred = model.predict(X)
try:
print confusion_matrix(Y_true, [a[0] > 0.5 for a in Y_pred])
except IndexError:
print confusion_matrix(Y_true, [a > 0.5 for a in Y_pred])
fpr, tpr, _ = roc_curve(Y_true, Y_pred)
roc_auc = roc_auc_score(Y_true, Y_pred)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC - %s' % fname.split('/')[-1])
plt.legend(loc="lower right")
plt.show()
plt.savefig(fname + ' - roc.png')
return plt