def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, num_folds=10):
"""Calculate TPR and FPR under different threshold, accuracy under the best threshold"""
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
num_pairs = min(len(actual_issame), embeddings1.shape[0])
num_threshold = len(thresholds)
k_fold = KFold(n_splits=num_folds, shuffle=False)
tprs = np.zeros((num_folds, num_threshold))
fprs = np.zeros((num_folds, num_threshold))
acc = np.zeros((num_folds))
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
indices = np.arange(num_pairs)
for fold_id, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the best threshold
acc_train = np.zeros((num_threshold))
for thres_id, thres in enumerate(thresholds):
_, _, acc_train[thres_id] = calculate_acc(thres, dist[train_set], actual_issame[train_set])
best_id = np.argmax(acc_train)
# Calculate tprs and fprs on test set
for thres_id, thres in enumerate(thresholds):
tprs[fold_id, thres_id], fprs[fold_id, thres_id], _ = calculate_acc(thres, dist[test_set],
actual_issame[test_set])
# Use the best threshold to calculate accuracy
_, _, acc[fold_id] = calculate_acc(thresholds[best_id], dist[test_set], actual_issame[test_set])
tpr = np.mean(tprs, 0) # true positive rate under different threshold
fpr = np.mean(fprs, 0) # false positive rate under different threshold
return tpr, fpr, acc
评论列表
文章目录