def test_zero_precision_recall():
# Check that pathological cases do not bring NaNs
old_error_settings = np.seterr(all='raise')
try:
y_true = np.array([0, 1, 2, 0, 1, 2])
y_pred = np.array([2, 0, 1, 1, 2, 0])
assert_almost_equal(precision_score(y_true, y_pred,
average='weighted'), 0.0, 2)
assert_almost_equal(recall_score(y_true, y_pred, average='weighted'),
0.0, 2)
assert_almost_equal(f1_score(y_true, y_pred, average='weighted'),
0.0, 2)
finally:
np.seterr(**old_error_settings)
python类f1_score()的实例源码
def get_f1(scale):
global best_f1
# idx = np.random.choice(np.arange(len(crop)), 10000 if len(target)>10000 else len(target), replace=False)
idx = np.arange(len(target))
# pred = cnn.predict_proba((crop[idx])/scale, 1024, 0)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
res = keras_utils.test_data_cnn_rnn((crop[idx])/scale, target, groups, cnn, rnn, verbose=0, only_lstm = True, cropsize=0)
f1 = res[3]
acc= res[2]
# f1_score(np.argmax(target[idx],1), np.argmax(pred,1), average='macro')
print(res[2],f1)
return -acc
def plot_results_per_patient(predictions, targets, groups, title='Results per Patient', fname='results_pp.png'):
assert len(predictions) == len(targets), '{} predictions, {} targets'.format(len(predictions), len(targets))
IDs = np.unique(groups)
f1s = []
accs = []
if predictions.ndim == 2: predictions = np.argmax(predictions,1)
if targets.ndim == 2: targets = np.argmax(targets,1)
statechanges = []
for ID in IDs:
y_true = targets [groups==ID]
y_pred = predictions[groups==ID]
f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
f1s.append(f1)
accs.append(acc)
statechanges.append(np.sum(0!=y_true-np.roll(y_true,1))-1)
if fname != '':plt.figure()
plt.plot(f1s,'go')
plt.plot(accs,'bo')
if np.min(f1s) > 0.5:
plt.ylim([0.5,1])
plt.legend(['F1', 'Acc'])
plt.xlabel('Patient')
plt.ylabel('Score')
if fname is not '':
title = title + '\nMean Acc: {:.1f} mean F1: {:.1f}'.format(accuracy_score(targets, predictions)*100,f1_score(targets,predictions, average='macro')*100)
plt.title(title)
# plt.tight_layout()
if fname!='':
plt.savefig(os.path.join('plots', fname))
return (accs,f1s, statechanges)
def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm = False):
"""
mode = 'scores' or 'preds'
take two ready trained models (cnn+rnn)
test on input data and return acc+f1
"""
if target.ndim==2: target = np.argmax(target,1)
if cropsize != 0:
diff = (data.shape[1] - cropsize)//2
data = data[:,diff:-diff:,:]
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if only_lstm == False:
cnn_pred = cnn.predict_classes(data, 1024,verbose=0)
else:
cnn_pred = target
features = get_activations(cnn, data, 'fc1', verbose=verbose)
cnn_acc = accuracy_score(target, cnn_pred)
cnn_f1 = f1_score(target, cnn_pred, average='macro')
seqlen = rnn.input_shape[1]
features_seq, target_seq, groups_seq = tools.to_sequences(features, target, seqlen=seqlen, groups=groups)
new_targ_seq = np.roll(target_seq, 4)
rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0)
rnn_acc = accuracy_score(new_targ_seq, rnn_pred)
rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro')
confmat = confusion_matrix(new_targ_seq, rnn_pred)
return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]
#%%
def computeF1(preds, true_y):
try:
if (1 not in true_y) or (1 not in preds):
# F-score is ill-defined when there are no true samples
# F-score is ill-defined when there are no predicted samples.
return np.nan
return f1_score(true_y, preds)
except:
return np.nan
#The precision is the ratio tp / (tp + fp) where tp is the number of
#true positives and fp the number of false positives.
def run_model(model):
'''Train model'''
# Call global variables
x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST
model.fit(x_train, y_train)
# make predictions for test data
y_pred = model.predict(x_test)
# Accuracy
acc = metrics.accuracy_score(y_test, y_pred)
print('Accuracy: %.2f%%' % (acc * 100.0))
# F1_score
# f1_score = metrics.f1_score(y_test, y_pred)
# print("F1_score: %.2f%%" % (f1_score * 100.0))
# AUC of ROC
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = metrics.auc(fpr, tpr)
print('AUC: %.3f' % (auc))
# Logs for each fold
crossvalidation_acc.append(acc)
crossvalidation_auc.append(auc)
if ARGS.m:
cnf_matrix = confusion_matrix(y_test, y_pred)
print(cnf_matrix)
np.set_printoptions(precision=2)
if ARGS.t == '2':
classes = np.asarray(['Spiced', 'Non-spliced'])
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
elif ARGS.t == '3':
classes = np.asarray(['Low', 'Medium', 'High'])
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
plt.show()
if ARGS.f:
feature_selection(imp=IMP, model=model)
print()
def main():
import sys
import numpy as np
from sklearn import cross_validation
from sklearn import svm
import cPickle
data_dir = sys.argv[1]
fet_list = load_list(osp.join(data_dir, 'c3d.list'))
pos_list = load_list(osp.join(data_dir, 'pos.urls'))
features = np.load(osp.join(data_dir, 'c3d.npy'))
fet_set = set(fet_list)
pos_idx = [fet_list.index(i) for i in pos_list if i in fet_set]
y = np.zeros(features.shape[0])
y[pos_idx] = 1
print 'n_pos', np.sum(y), 'n_neg', np.sum(1 - y)
params = {'n_estimators':[2, 4, 5, 6, 8, 10, 30]}
#params = {'n_estimators':[50, 70, 100, 120, 150, 200]}
clf = grid_search.GridSearchCV(RandomForestClassifier(n_estimators = 2, n_jobs = 4), params, scoring = metrics.make_scorer(lambda yt, yp: metrics.f1_score(yt, yp, pos_label = 0)), cv = 5)
clf.fit(features, y)
print clf.best_score_
print clf.best_estimator_
cPickle.dump(clf.best_estimator_, open(osp.join(data_dir, 'c3d-models-rfc.pkl'), 'w'))
def evaluate(best_processed_path, model):
"""
Evaluate model on splitted 10 percent testing set
"""
x_test_char, x_test_type, y_test = prepare_feature(best_processed_path, option='test')
y_predict = model.predict([x_test_char, x_test_type])
y_predict = (y_predict.ravel() > 0.5).astype(int)
f1score = f1_score(y_test, y_predict)
precision = precision_score(y_test, y_predict)
recall = recall_score(y_test, y_predict)
return f1score, precision, recall
def f1_score1(y_pred, y_true):
"""
Returns the weighted f1 score
@param y_pred is a 1D array-like object that represents the predicted values
@param y_true is also a 1D array-like object of the same length as `y_pred` and represents the true values
"""
check_inputs(y_pred, y_true)
return f1_score(y_true, y_pred, average='weighted')
def leave_one_out_report(combined_results):
""" Evaluate leave-one-out CV results from different methods.
Arguments:
combined_results: list of tuples of the form
(method_name, true_y_vector, predicted_probabilities_vector)
Note the vectors really do need to be numpy arrays.
Returns: formatted report as string
"""
###
# Unfortunate code duplication with tabulate_metrics here,
# to be resolved later
probability_metrics = [
('AUC', roc_auc_score),
('AP', metrics.average_precision_score)
]
binary_metrics = [
('F1', metrics.f1_score),
('MCC', metrics.matthews_corrcoef),
('precision', metrics.precision_score),
('recall', metrics.recall_score)
]
metric_results = {label: [] for label, _ in
probability_metrics + binary_metrics}
metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
for label, metric in probability_metrics:
for fold, y_true, y_pred in combined_results:
metric_results[label].append(metric(y_true, y_pred))
for method, y_true, probabilities in combined_results:
y_pred = probabilities > 0.5
for label, metric in binary_metrics:
metric_results[label].append(metric(y_true, y_pred))
conf = zip(
('tn', 'fp', 'fn', 'tp'),
metrics.confusion_matrix(y_true, y_pred).flat
)
for label, n in conf:
metric_results[label].append(n)
index=[t[0] for t in combined_results]
table = pd.DataFrame(data=metric_results,
index=index)
report = table.to_string(float_format=lambda x: '%.3g' % x)
return report
def f1(_, predictions_binary, labels, parameters):
return metrics.f1_score(labels, predictions_binary, **parameters)
def apply_metrics(self, inputs: List[Tuple[QASetting, List[Answer]]], tensors: Mapping[TensorPort, np.ndarray]) \
-> Mapping[str, float]:
qs = [q for q, a in inputs]
p_answers = self.reader.output_module(qs, *(tensors[p] for p in self.reader.output_module.input_ports))
f1 = exact_match = 0
for pa, (q, ass) in zip(p_answers, inputs):
ground_truth = [a.text for a in ass]
f1 += metric_max_over_ground_truths(f1_score, pa[0].text, ground_truth)
exact_match += metric_max_over_ground_truths(exact_match_score, pa[0].text, ground_truth)
return {"f1": f1, "exact": exact_match}
def apply_metrics(self, inputs: List[Tuple[QASetting, List[Answer]]], tensors: Mapping[TensorPort, np.ndarray]) \
-> Mapping[str, float]:
labels = tensors[self._target_index_port]
predictions = tensors[self._predicted_index_port]
labels_np = np.array(labels)
acc_exact = np.sum(np.equal(labels_np, predictions))
acc_f1 = metrics.f1_score(labels_np, predictions, average='macro') * labels_np.shape[0]
return {"F1_macro": acc_f1, "Accuracy": acc_exact}
def getScores(labels_true, labels_pred):
str2 = "Average Precision: "+ str(precision_score(labels_true, labels_pred, average='weighted'))+'\n'
str2 += "Average Recall: "+ str( recall_score(labels_true, labels_pred, average='weighted'))+'\n'
str2 += "Average F1-measure: "+ str( f1_score(labels_true, labels_pred, average='weighted'))+'\n'
str2 += "Accuracy score: "+ str( accuracy_score(labels_true, labels_pred))+'\n'
str2 += "Mean absolute error (sklearn) on the test set is:"+ str( mean_absolute_error(labels_true, labels_pred))+'\n'
str2 += "Average Mean absolute error, and per class (official): "+ str(mae(labels_true, labels_pred))+'\n'
str2 += "Average Mean absolute error (official): " + str(mae(labels_true, labels_pred)[1])+'\n'
print(str2)
return str2
def get_best_C(Xtrain, ytrain, Xdev, ydev):
"""
Find the best parameters on the dev set.
"""
best_f1 = 0
best_c = 0
labels = sorted(set(ytrain))
test_cs = [0.001, 0.0025, 0.005, 0.0075,
0.01, 0.025, 0.05, 0.075,
0.1, 0.25, 0.5, 0.75,
1, 2.5, 5, 7.5]
for i, c in enumerate(test_cs):
sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
sys.stdout.flush()
clf = LogisticRegression(C=c)
h = clf.fit(Xtrain, ytrain)
pred = clf.predict(Xdev)
if len(labels) == 2:
dev_f1 = f1_score(ydev, pred, pos_label=1)
else:
dev_f1 = f1_score(ydev, pred, labels=labels, average='micro')
if dev_f1 > best_f1:
best_f1 = dev_f1
best_c = c
print()
print('Best F1 on dev data: {0:.3f}'.format(best_f1))
print('Best C on dev data: {0}'.format(best_c))
return best_c, best_f1
def get_best_C(Xtrain, ytrain, Xdev, ydev):
"""
Find the best parameters on the dev set.
"""
best_f1 = 0
best_c = 0
labels = sorted(set(ytrain))
test_cs = [0.001, 0.003, 0.006, 0.009,
0.01, 0.03, 0.06, 0.09,
0.1, 0.3, 0.6, 0.9,
1, 3, 6, 9,
10, 30, 60, 90]
for i, c in enumerate(test_cs):
sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
sys.stdout.flush()
clf = LogisticRegression(C=c)
h = clf.fit(Xtrain, ytrain)
pred = clf.predict(Xdev)
if len(labels) == 2:
dev_f1 = f1_score(ydev, pred, pos_label=1)
else:
dev_f1 = f1_score(ydev, pred, labels=labels, average='micro')
if dev_f1 > best_f1:
best_f1 = dev_f1
best_c = c
print()
print('Best F1 on dev data: {0:.3f}'.format(best_f1))
print('Best C on dev data: {0}'.format(best_c))
return best_c, best_f1
def get_best_C(dataset):
"""
Find the best parameters on the dev set.
"""
best_f1 = 0
best_c = 0
labels = sorted(set(dataset._ytrain))
test_cs = [0.001, 0.003, 0.006, 0.009,
0.01, 0.03, 0.06, 0.09,
0.1, 0.3, 0.6, 0.9,
1, 3, 6, 9,
10, 30, 60, 90]
for i, c in enumerate(test_cs):
sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
sys.stdout.flush()
clf = LogisticRegression(C=c)
h = clf.fit(dataset._Xtrain, dataset._ytrain)
pred = clf.predict(dataset._Xdev)
if len(labels) == 2:
dev_f1 = f1_score(dataset._ydev, pred, pos_label=1)
else:
dev_f1 = f1_score(dataset._ydev, pred, labels=labels, average='micro')
if dev_f1 > best_f1:
best_f1 = dev_f1
best_c = c
print()
print('Best F1 on dev data: {0:.3f}'.format(best_f1))
print('Best C on dev data: {0}'.format(best_c))
return best_c, best_f1
def f1(self):
return f1_score(self._y_true, self._y_pred, self._labels,
self._pos_label, self._average)
def f1_score_least_frequent(y_true, y_pred):
"""
Calculate the F1 score of the least frequent label/class in ``y_true`` for
``y_pred``.
:param y_true: The true/actual/gold labels for the data.
:type y_true: array-like of float
:param y_pred: The predicted/observed labels for the data.
:type y_pred: array-like of float
:returns: F1 score of the least frequent label
"""
least_frequent = np.bincount(y_true).argmin()
return f1_score(y_true, y_pred, average=None)[least_frequent]
evaluation.py 文件源码
项目:scik-learn-learn-Chinese-text-classider
作者: chapzq77
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def calculate_3result(actual,predict):
m_precison = metrics.precision_score(actual,predict,average='macro')
m_recall = metrics.recall_score(actual,predict,average='macro')
m_f1 = metrics.f1_score(actual,predict,average='macro')
print "?????"
print "????{0:.3f}".format(m_precison)
print "????{0:.3f}".format(m_recall)
print "f1-score:{0:.3f}".format(m_f1)
#??????