def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
# Print additional metrics involving predictions
n_rows = (y_dset.shape[0] / batch_size) * batch_size
y_true = y_dset[0:n_rows, :].flatten()
y_pred = y_pred.flatten()
val_ap = average_precision_score(y_true, y_pred)
val_roc = roc_auc_score(y_true, y_pred)
n = y_true.size
n_pos = y_true.sum()
idx_sorted = np.argsort(-y_pred)
val_rec = []
logging.info(dset + "-AP {:.6f}".format(val_ap))
logging.info(dset + "-ROC {:.6f}".format(val_roc))
for i, v in enumerate([10, 25, 50, 75, 100]):
tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
val_rec.append(tp * 1.0 / n_pos)
logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
return val_ap, val_rec[2]
# ############################## Main program #################################
python类average_precision_score()的实例源码
def analyzeResult_temp(data,model,DataVecs):
predict = model.predict(DataVecs)
data['predict'] = predict
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation and result analysis uing adjusted thresholds
def analyzeResult(data,model,DataVecs,threshold):
predict = model.predict_proba(DataVecs)[:,1]
True,False=1,0
data['predict'] = (predict > threshold)
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
clf.fit(X_t_train, y_train)
app = dict()
score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
#app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
app['F2 Score'] = avg_sample_score
app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
app['P_AUPR'] = avg_prec
app['Precision'] = prec_score
app['Recall'] = rec_score
return app
def _average_precision(y_true, y_score):
"""Alternative implementation to check for correctness of
`average_precision_score`."""
pos_label = np.unique(y_true)[1]
n_pos = np.sum(y_true == pos_label)
order = np.argsort(y_score)[::-1]
y_score = y_score[order]
y_true = y_true[order]
score = 0
for i in range(len(y_score)):
if y_true[i] == pos_label:
# Compute precision up to document i
# i.e, percentage of relevant documents up to document i.
prec = 0
for j in range(0, i + 1):
if y_true[j] == pos_label:
prec += 1.0
prec /= (i + 1.0)
score += prec
return score / n_pos
def _test_precision_recall_curve(y_true, probas_pred):
# Test Precision-Recall and aread under PR curve
p, r, thresholds = precision_recall_curve(y_true, probas_pred)
precision_recall_auc = auc(r, p)
assert_array_almost_equal(precision_recall_auc, 0.85, 2)
assert_array_almost_equal(precision_recall_auc,
average_precision_score(y_true, probas_pred))
assert_almost_equal(_average_precision(y_true, probas_pred),
precision_recall_auc, 1)
assert_equal(p.size, r.size)
assert_equal(p.size, thresholds.size + 1)
# Smoke test in the case of proba having only one value
p, r, thresholds = precision_recall_curve(y_true,
np.zeros_like(probas_pred))
precision_recall_auc = auc(r, p)
assert_array_almost_equal(precision_recall_auc, 0.75, 3)
assert_equal(p.size, r.size)
assert_equal(p.size, thresholds.size + 1)
def multilabel_precision_recall(y_score, y_test, clf_target_ids, clf_target_names):
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize
# Compute Precision-Recall and plot curve
precision = dict()
recall = dict()
average_precision = dict()
# Find indices that have non-zero detections
clf_target_map = { k: v for k,v in zip(clf_target_ids, clf_target_names)}
id2ind = {tid: idx for (idx,tid) in enumerate(clf_target_ids)}
# Only handle the targets encountered
unique = np.unique(y_test)
nzinds = np.int64([id2ind[target] for target in unique])
# Binarize and create precision-recall curves
y_test_multi = label_binarize(y_test, classes=unique)
for i,target in enumerate(unique):
index = id2ind[target]
name = clf_target_map[target]
precision[name], recall[name], _ = precision_recall_curve(y_test_multi[:, i],
y_score[:, index])
average_precision[name] = average_precision_score(y_test_multi[:, i], y_score[:, index])
# Compute micro-average ROC curve and ROC area
precision["average"], recall["average"], _ = precision_recall_curve(y_test_multi.ravel(),
y_score[:,nzinds].ravel())
average_precision["micro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
average="micro")
average_precision["macro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
average="macro")
return precision, recall, average_precision
def plot_precision_recall(indir, gts_file, outdir):
groundtruths = read_item_tag(gts_file)
plt.figure(1)
indir = utils.abs_path_dir(indir)
for item in os.listdir(indir):
if ".csv" in item:
isrcs = read_preds(indir + "/" + item)
test_groundtruths = []
predictions = []
for isrc in isrcs:
if isrc in groundtruths:
test_groundtruths.append(groundtruths[isrc])
predictions.append(isrcs[isrc])
test_groundtruths = [tag=="s" for tag in test_groundtruths]
precision, recall, _ = precision_recall_curve(test_groundtruths, predictions)
plt.plot(recall, precision, label=item[:-4] + " (" + str(round(average_precision_score(test_groundtruths, predictions), 3)) + ")")
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([-0.05, 1.05])
plt.title('Precision-Recall curve for Algo (AUC)')
plt.legend(loc='best')
plt.savefig(outdir + "precision_recall.png", dpi=200, bbox_inches="tight")
# plt.show()
plt.close()
utils.print_success("Precision-Recall curve created in " + outdir)
def plot_pr(gold, predicted_prob, lb):
pp1 = predicted_prob[:,1] # prob for class 1
p, r, th = precision_recall_curve(gold, pp1)
ap = average_precision_score(gold, pp1)
plt.plot(r, p, label= lb + ' (area = {0:0.2f})'
''.format(ap))
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision and Recall')
plt.legend(loc="upper right")
#plt.show()
def eval_clf(gold, clf, mat, start = 0):
pp = clf.predict_proba(mat[start:,:])
pp1 = pp[:,1]
ap = average_precision_score(gold[start:], pp1)
return ap
def video_mean_ap(score_dict, video_list):
avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
v.id in score_dict]
pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
gt_array = np.zeros(pred_array.shape)
for i in xrange(pred_array.shape[0]):
gt_array[i, list(avail_video_labels[i])] = 1
mean_ap = average_precision_score(gt_array, pred_array, average='macro')
return mean_ap
def get_roc_score(edges_pos, edges_neg, emb=None):
if emb is None:
feed_dict.update({placeholders['dropout']: 0})
emb = sess.run(model.z_mean, feed_dict=feed_dict)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Predict on test set of edges
adj_rec = np.dot(emb, emb.T)
preds = []
pos = []
for e in edges_pos:
preds.append(sigmoid(adj_rec[e[0], e[1]]))
pos.append(adj_orig[e[0], e[1]])
preds_neg = []
neg = []
for e in edges_neg:
preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
neg.append(adj_orig[e[0], e[1]])
preds_all = np.hstack([preds, preds_neg])
labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds))])
roc_score = roc_auc_score(labels_all, preds_all)
ap_score = average_precision_score(labels_all, preds_all)
return roc_score, ap_score
def leave_one_out_report(combined_results):
""" Evaluate leave-one-out CV results from different methods.
Arguments:
combined_results: list of tuples of the form
(method_name, true_y_vector, predicted_probabilities_vector)
Note the vectors really do need to be numpy arrays.
Returns: formatted report as string
"""
###
# Unfortunate code duplication with tabulate_metrics here,
# to be resolved later
probability_metrics = [
('AUC', roc_auc_score),
('AP', metrics.average_precision_score)
]
binary_metrics = [
('F1', metrics.f1_score),
('MCC', metrics.matthews_corrcoef),
('precision', metrics.precision_score),
('recall', metrics.recall_score)
]
metric_results = {label: [] for label, _ in
probability_metrics + binary_metrics}
metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
for label, metric in probability_metrics:
for fold, y_true, y_pred in combined_results:
metric_results[label].append(metric(y_true, y_pred))
for method, y_true, probabilities in combined_results:
y_pred = probabilities > 0.5
for label, metric in binary_metrics:
metric_results[label].append(metric(y_true, y_pred))
conf = zip(
('tn', 'fp', 'fn', 'tp'),
metrics.confusion_matrix(y_true, y_pred).flat
)
for label, n in conf:
metric_results[label].append(n)
index=[t[0] for t in combined_results]
table = pd.DataFrame(data=metric_results,
index=index)
report = table.to_string(float_format=lambda x: '%.3g' % x)
return report
def avg_precision(predictions_proba, _, labels, parameters):
return metrics.average_precision_score(labels, predictions_proba)
metrics.py 文件源码
项目:Video-Classification-Action-Recognition
作者: qijiezhao
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def video_mean_ap(score_dict, video_list):
avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
v.id in score_dict]
pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
gt_array = np.zeros(pred_array.shape)
for i in xrange(pred_array.shape[0]):
gt_array[i, list(avail_video_labels[i])] = 1
mean_ap = average_precision_score(gt_array, pred_array, average='macro')
return mean_ap
def mean_ap(distmat, query_ids=None, gallery_ids=None,
query_cams=None, gallery_cams=None):
distmat = to_numpy(distmat)
m, n = distmat.shape
# Fill up default values
if query_ids is None:
query_ids = np.arange(m)
if gallery_ids is None:
gallery_ids = np.arange(n)
if query_cams is None:
query_cams = np.zeros(m).astype(np.int32)
if gallery_cams is None:
gallery_cams = np.ones(n).astype(np.int32)
# Ensure numpy array
query_ids = np.asarray(query_ids)
gallery_ids = np.asarray(gallery_ids)
query_cams = np.asarray(query_cams)
gallery_cams = np.asarray(gallery_cams)
# Sort and find correct matches
indices = np.argsort(distmat, axis=1)
matches = (gallery_ids[indices] == query_ids[:, np.newaxis])
# Compute AP for each query
aps = []
for i in range(m):
# Filter out the same id and same camera
valid = ((gallery_ids[indices[i]] != query_ids[i]) |
(gallery_cams[indices[i]] != query_cams[i]))
y_true = matches[i, valid]
y_score = -distmat[i][indices[i]][valid]
if not np.any(y_true): continue
aps.append(average_precision_score(y_true, y_score))
if len(aps) == 0:
raise RuntimeError("No valid query")
return np.mean(aps)
def on_epoch_end(self, epoch, logs={}):
X_validation = self.model.validation_data[0]
y_validation = self.model.validation_data[1]
y_result=self.model.predict(X_validation)
map = average_precision_score(y_validation.data[y_validation.start: y_validation.end], y_result, average='micro')
logs['val_map']=map
print("val_MAP: {}\n".format(map))
def generate_prec_recall_points(clf, test_examples, test_labels, pk_file):
# Generate precision-recall points and store in a pickle file.
precision = dict()
recall = dict()
average_precision = dict()
thresholds = dict()
n_classes = len(clf.model.classes_)
y_test = label_binarize(test_labels, clf.model.classes_)
y_score = clf.predict_raw_prob(test_examples)
# It only output 1 column of positive probability.
y_score = y_score[:, 1:]
for i in range(n_classes - 1):
precision[i], recall[i], thresholds[i] = precision_recall_curve(
y_test[:, i],
y_score[:, i])
average_precision[i] = average_precision_score(y_test[:, i],
y_score[:, i])
# Compute micro-average ROC curve and ROC area
precision["micro"], recall["micro"], thresholds['micro'] = \
precision_recall_curve(y_test.ravel(), y_score.ravel())
average_precision["micro"] = average_precision_score(y_test, y_score,
average="micro")
if pk_file is not None:
with open(pk_file, 'wb') as f:
pickle.dump((precision, recall, average_precision, thresholds), f)
def run_auc(job, context, name, compare_id):
"""
AUC of roc plot.
ROC plot is defined with mismapped reads being negatives, correctly-mapped
reads being positives, and AUC expressing how good of a classifier of
correctly-mapped-ness the MAPQ score is. It says nothing about how well the
reads are actually mapped.
"""
if not have_sklearn:
return ["sklearn_not_installed"] * 2
work_dir = job.fileStore.getLocalTempDir()
compare_file = os.path.join(work_dir, '{}.compare.positions'.format(name))
job.fileStore.readGlobalFile(compare_id, compare_file)
try:
data = np.loadtxt(compare_file, dtype=np.int, delimiter =', ', usecols=(1,2)).T
auc = roc_auc_score(data[0], data[1])
aupr = average_precision_score(data[0], data[1])
except:
# will happen if file is empty
auc, aupr = 0, 0
return auc, aupr
def save_prcurve(prob, answer, model_name, save_fn, use_neg=True):
"""
save prc curve
"""
if not use_neg:
prob_dn = []
ans_dn = []
for p in prob:
prob_dn.append(p[1:])
for ans in answer:
ans_dn.append(ans[1:])
prob = np.reshape(np.array(prob_dn), (-1))
ans = np.reshape(np.array(ans_dn), (-1))
else:
prob = np.reshape(prob, (-1))
ans = np.reshape(answer, (-1))
precision, recall, threshold = precision_recall_curve(ans, prob)
average_precision = average_precision_score(ans, prob)
plt.clf()
plt.plot(recall[:], precision[:], lw=2, color='navy', label=model_name)
plt.xlabel('Recall')
plt.ylabel('Precision')
# plt.ylim([0.3, 1.0])
# plt.xlim([0.0, 0.4])
plt.title('Precision-Recall Area={0:0.2f}'.format(average_precision))
plt.legend(loc="upper right")
plt.grid(True)
plt.savefig(save_fn)
def score_func(estimator,X,Y):
global accuracy,precision,recall,f1,mcc,auc,aupr,resultpredict,resultproba,resultlabel
predict_proba = estimator.predict_proba(X)[:,1]
True,False=1,0
predict = (predict_proba > 0.50)
resultlabel = np.hstack((resultlabel,Y))
resultpredict = np.hstack((resultpredict,predict))
resultproba = np.hstack((resultproba,predict_proba))
precision+=precision_score(Y,predict)
recall+=recall_score(Y,predict)
f1+=f1_score(Y,predict)
accuracy += accuracy_score(Y,predict)
mcc += matthews_corrcoef(Y,predict)
auc += roc_auc_score(Y,predict_proba)
aupr += average_precision_score(Y,predict_proba)
print "finish one"
return matthews_corrcoef(Y,predict)
# Performance evaluation
def main():
"""
Calculate the Average Precision (AP) at k.
"""
# Get the arguments
args = docopt("""Calculate the Average Precision (AP) at k.
Usage:
ap.py <test_results_file> <k>
<test_results_file> = the test set result file
<k> = the cutoff; if it is equal to zero, all the rank is considered.
""")
test_results_file = args['<test_results_file>']
cutoff = int(args['<k>'])
# Sort the lines in the file in descending order according to the score
dataset = load_dataset(test_results_file)
dataset = sorted(dataset, key=lambda line: line[-1], reverse=True)
gold = np.array([1 if label == 'True' else 0 for (x, y, label, score) in dataset])
scores = np.array([score for (x, y, label, score) in dataset])
for i in range(1, min(cutoff + 1, len(dataset))):
try:
score = average_precision_score(gold[:i], scores[:i])
except:
score = 0
print 'Average Precision at %d is %.3f' % (i, 0 if score == -1 else score)
print 'FINAL: Average Precision at %d is %.3f' % (len(dataset), average_precision_score(gold, scores))
def compute_pr(y_test, probability_predictions):
"""
Compute Precision-Recall, thresholds and PR AUC.
Args:
y_test (list) : true label values corresponding to the predictions. Also length n.
probability_predictions (list) : predictions coming from an ML algorithm of length n.
Returns:
dict:
"""
_validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)
# Calculate PR
precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)
# get ideal cutoffs for suggestions (upper right or 1,1)
pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2
# To prevent the case where there are two points with the same minimum distance, return only the first
# np.where returns a tuple (we want the first element in the first array)
pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
best_precision = precisions[pr_index]
best_recall = recalls[pr_index]
ideal_pr_cutoff = pr_thresholds[pr_index]
return {'pr_auc': pr_auc,
'best_pr_cutoff': ideal_pr_cutoff,
'best_precision': best_precision,
'best_recall': best_recall,
'precisions': precisions,
'recalls': recalls,
'pr_thresholds': pr_thresholds}
def evalData(z,test_set_y):
" z- prediction test_set_y is the truth "
diff=z-test_set_y
fpr, tpr, thresholds = metrics.roc_curve(test_set_y.ravel(), z.ravel(), pos_label=1)
auc=metrics.auc(fpr, tpr)
ap=metrics.average_precision_score(test_set_y.ravel(), z.ravel())
Q=test_set_y.shape[0]
Pk10=0
Pk20=0
Pk30=0
Pk50=0
Pk37=0
for i in range(Q):
Pk10+=ranking_precision_score(test_set_y[i], z[i], k=10)
Pk20+=ranking_precision_score(test_set_y[i], z[i], k=20)
Pk30+=ranking_precision_score(test_set_y[i], z[i], k=30)
Pk37+=ranking_precision_score(test_set_y[i], z[i], k=37)
Pk50+=ranking_precision_score(test_set_y[i], z[i], k=30)
Pk10=Pk10/Q
Pk20=Pk20/Q
Pk30=Pk30/Q
Pk50=Pk50/Q
Pk37=Pk37/Q
cross=metrics.log_loss(test_set_y,z)
print '\n'
print 'AUC',auc,'MSE',np.mean((diff)**2),'Cross-entropy:',cross
print 'Precision at k=10: ',Pk10,' k=20: ',Pk20,' k=30: ',Pk30,' k=50: ',Pk50, ' k=37: ',Pk37
return Pk37
def compute_ap(class_score_matrix, labels):
num_classes=class_score_matrix.shape[1]
one_hot_labels=dense_to_one_hot(labels, num_classes)
predictions=np.array(class_score_matrix>0, dtype="int32")
average_precision=[]
for i in range(num_classes):
ps=average_precision_score(one_hot_labels[:, i], class_score_matrix[:, i])
# if not np.isnan(ps):
average_precision.append(ps)
return np.array(average_precision)
def evaluate(classes, y_gt, y_pred, threshold_value=0.5):
"""
Arguments:
y_gt (num_bag x L): groud truth
y_pred (num_bag x L): prediction
"""
print("thresh = {:.6f}".format(threshold_value))
y_pred_bin = y_pred >= threshold_value
score_f1_macro = f1_score(y_gt, y_pred_bin, average="macro")
print("Macro f1_socre = {:.6f}".format(score_f1_macro))
score_f1_micro = f1_score(y_gt, y_pred_bin, average="micro")
print("Micro f1_socre = {:.6f}".format(score_f1_micro))
# hamming loss
h_loss = hamming_loss(y_gt, y_pred_bin)
print("Hamming Loss = {:.6f}".format(h_loss))
mAP = average_precision_score(y_gt, y_pred)
print("mAP = {:.2f}%".format(mAP * 100))
# ap_classes = []
# for i, cls in enumerate(classes):
# ap_cls = average_precision_score(y_gt[:, i], y_pred[:, i])
# ap_classes.append(ap_cls)
# print("AP({}) = {:.2f}%".format(cls, ap_cls * 100))
# print("mAP = {:.2f}%".format(np.mean(ap_classes) * 100))
def computeAveragePrecisionMetrics(truthValues, testValues):
"""
Compute average precision.
"""
metrics = [
{
'name': 'average_precision',
'value': average_precision_score(
y_true=truthValues, y_score=testValues)
}
]
return metrics
def compute_metrics(sess, logits_op, placeholders, data_file, exporter=None):
"""Compute metrics MAP and MRR over a dataset.
:param sess: TensorFlow session
:param logits_op: an operation that returns the scores for a given set of
sentences
:param placeholders: placeholders defined for `logits_op`
:data_file: a HDF5 file object holding the dataset
:returns: the values of MAP and MRR as a tuple: (MAP, MRR)
"""
questions_ph, sentences_ph, keep_prob_ph = placeholders
if exporter is None:
exporter = dataio.no_op()
next(exporter) # priming the coroutine
total_avep = 0.0
total_mrr = 0.0
n_questions = 0
for batch in dataio.question_batches(data_file):
feed_dict = {
questions_ph: batch.questions,
sentences_ph: batch.sentences,
keep_prob_ph: 1.0
}
scores = logits_op.eval(session=sess, feed_dict=feed_dict)
exporter.send(scores)
n_questions += 1
avep = average_precision(batch.labels, scores)
total_avep += avep
mrr = mean_reciprocal_rank(batch.labels, scores)
total_mrr += mrr
exporter.close()
mean_avep = total_avep / n_questions
mean_mrr = total_mrr / n_questions
return mean_avep, mean_mrr
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
clf.fit(X_t_train, y_train)
y_score = clf.predict_proba(X_t_test)
app = dict()
score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
#auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
#app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(list(enumerate(mlb.classes_)))):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])
app['F2 Score'] = avg_sample_score
app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
app['P_AUPR'] = avg_prec
app['Precision'] = prec_score
app['Recall'] = rec_score
app['ROC_AUC_samples'] = roc_auc
return app
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
clf.fit(X_t_train, y_train)
y_score = clf.predict_proba(X_t_test)
app = dict()
score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
#auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
#app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(list(enumerate(mlb.classes_)))):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])
app['F2 Score'] = avg_sample_score
app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
app['P_AUPR'] = avg_prec
app['Precision'] = prec_score
app['Recall'] = rec_score
app['ROC_AUC_samples'] = roc_auc
return app