def compute_entities_f1(gold_graph, pred_graph):
"""
Compute the agreement for the entity entailment graph, for each entity, and return the average
:param gold_graph: the first annotator's graph
:param pred_graph: the second annotator's graph
:return: the entity edges' mean F1 score
"""
# Get all the possible edges in the entity entailment graph
all_edges = {str(entity): set([(str(m1), str(m2))
for m1 in entity.mentions.values()
for m2 in entity.mentions.values() if m1 != m2])
for entity in gold_graph.entities.values() if len(entity.mentions) > 1}
# Get the binary predictions/gold for these edges
str_entities_gold = { entity : str(entity) for entity in gold_graph.entities.values() }
entity_entailments_gold = {str_entities_gold[entity]:
[1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0
for (m1, m2) in all_edges[str_entities_gold[entity]]]
for entity in gold_graph.entities.values() if str_entities_gold[entity] in all_edges.keys()}
str_entities_pred = { entity : str(entity) for entity in pred_graph.entities.values() }
entity_entailments_pred = {str_entities_pred[entity]:
[1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0
for (m1, m2) in all_edges[str_entities_pred[entity]]]
for entity in pred_graph.entities.values() if str_entities_pred[entity] in all_edges.keys()}
mutual_entities = list(set(entity_entailments_gold.keys()).intersection(entity_entailments_pred.keys()))
# If both graphs contain no entailments, the score should be one
f1 = np.mean([precision_recall_fscore_support(entity_entailments_gold[entity], entity_entailments_pred[entity],
average='binary')[2]
if np.sum(entity_entailments_gold[entity]) > 0 or np.sum(entity_entailments_pred[entity]) > 0 else 1.0
for entity in mutual_entities])
return f1
python类precision_recall_fscore_support()的实例源码
def acc_f1_roc(gt, prob, pred):
acc = accuracy_score(gt, pred)*100.
acc_not_normed = accuracy_score(gt, pred, normalize=False)
f1 = f1_score(gt, pred)*100.
roc = roc_auc_score(gt, prob, average='macro')*100.
p, r, _, _ = precision_recall_fscore_support(gt, pred, average='binary')
# print p, r
return acc, acc_not_normed, f1, roc, p, r
def getScores(clf, X, y):
predictions = clf.predict(X)
scores = precision_recall_fscore_support(y, predictions, average='binary')
return scores
def getScores(clf, X, y):
predictions = clf.predict(X)
scores = precision_recall_fscore_support(y, predictions, average='binary')
return scores
# Import data
def test_single(data, label, model):
prediction = model.predict(data)
#return float(np.sum(prediction == label)) / len(label)
pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
return f1
def test_rating(data, label, model):
prediction = model.predict(data)
#return float(np.sum(prediction % len(loadFile.aspect_dic) == (label % len(loadFile.aspect_dic)))) / len(label)
prediction = prediction % len(loadFile.aspect_dic)
label = label % len(loadFile.aspect_dic)
pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
return f1
def test_aspect(data, label, model):
prediction = model.predict(data)
#return float(np.sum(prediction // len(loadFile.aspect_dic) == (label // len(loadFile.aspect_dic)))) / len(label)
prediction = prediction // len(loadFile.aspect_dic)
label = label // len(loadFile.aspect_dic)
pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
return f1
def test_mat(data, label, model):
prediction1 = model[0].predict(data)
prediction2 = model[1].predict(data)
#return float(np.logical_and(prediction1 == label[:, 0], prediction2 == label[:, 1]).sum()) / len(label)
label = label[:, 0] * 100 + label[:, 1]
prediction = prediction1 * 100 + prediction2
pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
return f1
def test(net_file, data_set, label_method, model='RNN', trees=None):
if trees is None:
trees = tree.load_all(data_set, label_method)
assert net_file is not None, "Must give model to test"
print "Testing netFile %s" % net_file
with open(net_file, 'r') as fid:
opts = pickle.load(fid)
_ = pickle.load(fid)
if model == 'RNTN':
nn = RNTN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch)
elif model == 'RNN':
nn = RNN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch)
elif opts.model == 'TreeLSTM':
nn = TreeLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho)
elif opts.model == 'TreeTLSTM':
nn = TreeTLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho)
else:
raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model
nn.init_params()
nn.from_file(fid)
print "Testing %s..." % model
cost, correct, guess = nn.cost_and_grad(trees, test=True)
correct_sum = 0
for i in xrange(0, len(correct)):
correct_sum += (guess[i] == correct[i])
confusion = [[0 for i in range(nn.output_dim)] for j in range(nn.output_dim)]
for i, j in zip(correct, guess): confusion[i][j] += 1
# makeconf(confusion)
pre, rec, f1, support = metrics.precision_recall_fscore_support(correct, guess)
#print "Cost %f, Acc %f" % (cost, correct_sum / float(len(correct)))
#return correct_sum / float(len(correct))
f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
print "Cost %f, F1 %f, Acc %f" % (cost, f1, correct_sum / float(len(correct)))
return f1
def test_precision_recall_f1_score_binary():
# Test Precision Recall and F1 Score for binary classification task
y_true, y_pred, _ = make_prediction(binary=True)
# detailed measures for each class
p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
assert_array_almost_equal(p, [0.73, 0.85], 2)
assert_array_almost_equal(r, [0.88, 0.68], 2)
assert_array_almost_equal(f, [0.80, 0.76], 2)
assert_array_equal(s, [25, 25])
# individual scoring function that can be used for grid search: in the
# binary class case the score is the value of the measure for the positive
# class (e.g. label == 1). This is deprecated for average != 'binary'.
assert_dep_warning = partial(assert_warns, DeprecationWarning)
for kwargs, my_assert in [({}, assert_no_warnings),
({'average': 'binary'}, assert_no_warnings),
({'average': 'micro'}, assert_dep_warning)]:
ps = my_assert(precision_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(ps, 0.85, 2)
rs = my_assert(recall_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(rs, 0.68, 2)
fs = my_assert(f1_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(fs, 0.76, 2)
assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2,
**kwargs),
(1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
def test_precision_recall_fscore_support_errors():
y_true, y_pred, _ = make_prediction(binary=True)
# Bad beta
assert_raises(ValueError, precision_recall_fscore_support,
y_true, y_pred, beta=0.0)
# Bad pos_label
assert_raises(ValueError, precision_recall_fscore_support,
y_true, y_pred, pos_label=2, average='macro')
# Bad average option
assert_raises(ValueError, precision_recall_fscore_support,
[0, 1, 2], [1, 2, 0], average='mega')
def test_precision_refcall_f1_score_multilabel_unordered_labels():
# test that labels need not be sorted in the multilabel case
y_true = np.array([[1, 1, 0, 0]])
y_pred = np.array([[0, 0, 1, 1]])
for average in ['samples', 'micro', 'macro', 'weighted', None]:
p, r, f, s = precision_recall_fscore_support(
y_true, y_pred, labels=[3, 0, 1, 2], warn_for=[], average=average)
assert_array_equal(p, 0)
assert_array_equal(r, 0)
assert_array_equal(f, 0)
if average is None:
assert_array_equal(s, [0, 1, 1, 0])
def test_precision_recall_f1_score_multiclass_pos_label_none():
# Test Precision Recall and F1 Score for multiclass classification task
# GH Issue #1296
# initialize data
y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1])
y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])
# compute scores with default labels introspection
p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
pos_label=None,
average='weighted')
def test_precision_recall_f1_no_labels():
y_true = np.zeros((20, 3))
y_pred = np.zeros_like(y_true)
# tp = [0, 0, 0]
# fn = [0, 0, 0]
# fp = [0, 0, 0]
# support = [0, 0, 0]
# |y_hat_i inter y_i | = [0, 0, 0]
# |y_i| = [0, 0, 0]
# |y_hat_i| = [0, 0, 0]
for beta in [1]:
p, r, f, s = assert_warns(UndefinedMetricWarning,
precision_recall_fscore_support,
y_true, y_pred, average=None, beta=beta)
assert_array_almost_equal(p, [0, 0, 0], 2)
assert_array_almost_equal(r, [0, 0, 0], 2)
assert_array_almost_equal(f, [0, 0, 0], 2)
assert_array_almost_equal(s, [0, 0, 0], 2)
fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
y_true, y_pred, beta=beta, average=None)
assert_array_almost_equal(fbeta, [0, 0, 0], 2)
for average in ["macro", "micro", "weighted", "samples"]:
p, r, f, s = assert_warns(UndefinedMetricWarning,
precision_recall_fscore_support,
y_true, y_pred, average=average,
beta=beta)
assert_almost_equal(p, 0)
assert_almost_equal(r, 0)
assert_almost_equal(f, 0)
assert_equal(s, None)
fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
y_true, y_pred,
beta=beta, average=average)
assert_almost_equal(fbeta, 0)
def clf_metrics(p_train, p_test, y_train, y_test):
""" Compute metrics on classifier predictions
Parameters
----------
p_train : np.array [n_samples]
predicted probabilities for training set
p_test : np.array [n_samples]
predicted probabilities for testing set
y_train : np.array [n_samples]
Training labels.
y_test : np.array [n_samples]
Testing labels.
Returns
-------
clf_scores : dict
classifier scores for training set
"""
y_pred_train = 1*(p_train >= 0.5)
y_pred_test = 1*(p_test >= 0.5)
train_scores = {}
test_scores = {}
train_scores['accuracy'] = metrics.accuracy_score(y_train, y_pred_train)
test_scores['accuracy'] = metrics.accuracy_score(y_test, y_pred_test)
train_scores['mcc'] = metrics.matthews_corrcoef(y_train, y_pred_train)
test_scores['mcc'] = metrics.matthews_corrcoef(y_test, y_pred_test)
(p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
y_pred_train)
train_scores['precision'] = p
train_scores['recall'] = r
train_scores['f1'] = f
train_scores['support'] = s
(p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
y_pred_test)
test_scores['precision'] = p
test_scores['recall'] = r
test_scores['f1'] = f
test_scores['support'] = s
train_scores['confusion matrix'] = \
metrics.confusion_matrix(y_train, y_pred_train, labels=[0, 1])
test_scores['confusion matrix'] = \
metrics.confusion_matrix(y_test, y_pred_test, labels=[0, 1])
train_scores['auc score'] = \
metrics.roc_auc_score(y_train, p_train + 1, average='weighted')
test_scores['auc score'] = \
metrics.roc_auc_score(y_test, p_test + 1, average='weighted')
clf_scores = {'train': train_scores, 'test': test_scores}
return clf_scores
def melodiness_metrics(m_train, m_test, y_train, y_test):
""" Compute metrics on melodiness score
Parameters
----------
m_train : np.array [n_samples]
melodiness scores for training set
m_test : np.array [n_samples]
melodiness scores for testing set
y_train : np.array [n_samples]
Training labels.
y_test : np.array [n_samples]
Testing labels.
Returns
-------
melodiness_scores : dict
melodiness scores for training set
"""
m_bin_train = 1*(m_train >= 1)
m_bin_test = 1*(m_test >= 1)
train_scores = {}
test_scores = {}
train_scores['accuracy'] = metrics.accuracy_score(y_train, m_bin_train)
test_scores['accuracy'] = metrics.accuracy_score(y_test, m_bin_test)
train_scores['mcc'] = metrics.matthews_corrcoef(y_train, m_bin_train)
test_scores['mcc'] = metrics.matthews_corrcoef(y_test, m_bin_test)
(p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
m_bin_train)
train_scores['precision'] = p
train_scores['recall'] = r
train_scores['f1'] = f
train_scores['support'] = s
(p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
m_bin_test)
test_scores['precision'] = p
test_scores['recall'] = r
test_scores['f1'] = f
test_scores['support'] = s
train_scores['confusion matrix'] = \
metrics.confusion_matrix(y_train, m_bin_train, labels=[0, 1])
test_scores['confusion matrix'] = \
metrics.confusion_matrix(y_test, m_bin_test, labels=[0, 1])
train_scores['auc score'] = \
metrics.roc_auc_score(y_train, m_train + 1, average='weighted')
test_scores['auc score'] = \
metrics.roc_auc_score(y_test, m_test + 1, average='weighted')
melodiness_scores = {'train': train_scores, 'test': test_scores}
return melodiness_scores
def calc_and_append_scores(y_test, y_pred, metrics, featImportance):
metrics['scores_mae'].append(mean_absolute_error(y_test, y_pred))
_, score_off = mae(y_test, y_pred)
metrics['scores_mae_official'].append(score_off)
prec, rec, fmeasure, _ = precision_recall_fscore_support(y_test, y_pred, average='macro')
metrics['scores_prec'].append(prec)
metrics['scores_recall'].append(rec)
metrics['scores_f1'].append(fmeasure)
metrics['scores_accuracy'].append(accuracy_score(y_test, y_pred))
metrics['feature_importance'].append(featImportance)
# Getting class-individual metrics
tTP = [0,0,0,0]
tFP = [0,0,0,0]
tTN = [0,0,0,0]
tFN = [0,0,0,0]
for act, pred in zip(y_test, y_pred):
if act == pred:
for i in range(0,4):
if i == act: #add to true positive
tTP[i] += 1
else: #add to true negative
tTN[i] += 1
else:
for i in range(0,4):
if i == act: #add to false negative
tFN[i] += 1
else: #add to false positive
tFP[i] += 1
tpre = [0,0,0,0]
trec = [0,0,0,0]
tfm = [0,0,0,0]
ttp = [0,0,0,0]
for i in range(0,4):
if (tTP[i] > 0.):
tpre[i] = tTP[i] / (tTP[i] + tFP[i])
trec[i] = tTP[i] / (tTP[i] + tFN[i])
if ((trec[i] > 0.) | (tpre[i] > 0.)):
tfm[i] = (2*(tpre[i] * trec[i])) / (tpre[i]+trec[i])
ttp[i] = tTP[i]
#for each label separately,
# to see how well our model performs on separate labels
metrics['indRec'].append(trec)
metrics['indPrec'].append(tpre)
metrics['indFmeasure'].append(tfm)
metrics['indTP'].append(ttp)
def score(self, y_predicted, y_target, y_prob=None):
""" Compute metrics on classifier predictions
Parameters
----------
y_predicted : np.array [n_samples]
Predicted class labels
y_target : np.array [n_samples]
Target class labels
y_prob : np.array [n_samples] or None, default=None
predicted probabilties. If None, auc is not computed
Returns
-------
scores : dict
dictionary of scores for the following metrics:
accuracy, matthews correlation coefficient, precision, recall, f1,
support, confusion matrix, auc score
"""
labels = set(y_target)
labels.update(y_predicted)
is_binary = len(labels) <= 2
scores = {}
scores['accuracy'] = metrics.accuracy_score(y_target, y_predicted)
if is_binary:
scores['mcc'] = metrics.matthews_corrcoef(y_target, y_predicted)
else:
scores['mcc'] = None
(scores['precision'],
scores['recall'],
scores['f1'],
scores['support']) = metrics.precision_recall_fscore_support(
y_target, y_predicted
)
scores['confusion matrix'] = metrics.confusion_matrix(
y_target, y_predicted, labels=list(labels)
)
if y_prob is not None:
scores['auc score'] = metrics.roc_auc_score(
y_target, y_prob + 1, average='weighted'
)
else:
scores['auc score'] = None
return scores
###############################################################################
def classification_report(y_true, y_pred, labels=None, sample_weight=None, digits=4, threshold=None):
# this function is copied from https://github.com/scikit-learn/scikit-learn/blob/412996f/sklearn/metrics/classification.py#L1341 (c) respective authors
# I pulled it here to fix formatting bug.
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
y_true = np.array(y_true)
y_pred = np.array(y_pred)
if labels is None:
from sklearn.utils.multiclass import unique_labels
if threshold is not None:
y_true = y_true > threshold
y_pred = y_pred > threshold
labels = unique_labels(y_true, y_pred)
else:
labels = np.asarray(labels)
last_line_heading = 'avg / total'
target_names = ['%s' % l for l in labels]
results = [["", "precision", "recall", "f1-score", "support", "accuracy"]]
p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
labels=labels,
average=None,
sample_weight=sample_weight)
for i, label in enumerate(labels):
values = [target_names[i]]
for v in (p[i], r[i], f1[i]):
values += ["{0:0.{1}f}".format(v, digits)]
values += ["{0}".format(s[i])]
accuracy = accuracy_score(y_true == label, y_pred == label, sample_weight=sample_weight)
values += ["{0:0.{1}f}".format(accuracy, digits)]
results.append(values)
values = [last_line_heading]
for v in (np.average(p, weights=s),
np.average(r, weights=s),
np.average(f1, weights=s)):
values += ["{0:0.{1}f}".format(v, digits)]
values += ['{0}'.format(np.sum(s))]
accuracy = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
values += ["{0:0.{1}f}".format(accuracy, digits)]
results.append(values)
return results
def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2):
clf = None
precision = []
recall = []
fscore = []
if classifier == "NN":
clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None)
elif classifier == "LR":
clf = linear_model.LogisticRegression(C=1e3)
#clf = tree.DecisionTreeClassifier()
if classifier == "RF":
clf = RandomForestClassifier()
elif classifier == "NB":
clf = GaussianNB()
elif classifier == "SVM":
clf = LinearSVC()
elif classifier == "KNN":
clf = NearestCentroid()
skf = StratifiedKFold(n_splits=nfold, shuffle=True)
y_test_total = []
y_pred_total = []
for train_index, test_index in skf.split(document_term_matrix, labels):
X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index]
y_train, y_test = labels[train_index], labels[test_index]
y_test_total.extend(y_test.tolist())
model = clf.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_total.extend(y_pred.tolist())
p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print accuracy_score(y_test, y_pred)
a_score.append(accuracy_score(y_test, y_pred))
precision.append(p)
recall.append(r)
fscore.append(f)
plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5))
plt.savefig('lc.png')
return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)