def validate(data, labels):
'''
Ten-fold cross-validation with stratified sampling.
'''
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
sss = StratifiedShuffleSplit(n_splits=10)
for train_index, test_index in sss.split(data, labels):
x_train, x_test = data[train_index], data[test_index]
y_train, y_test = labels[train_index], labels[test_index]
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
accuracy_scores.append(accuracy_score(y_test, y_pred))
precision_scores.append(precision_score(y_test, y_pred))
recall_scores.append(recall_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred))
print('Accuracy', np.mean(accuracy_scores))
print('Precision', np.mean(precision_scores))
print('Recall', np.mean(recall_scores))
print('F1-measure', np.mean(f1_scores))
python类recall_score()的实例源码
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def train_model_with_cv(model, params, X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
# Use Train data to parameter selection in a Grid Search
gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5)
gs_clf = gs_clf.fit(X_train, y_train)
model = gs_clf.best_estimator_
# Use best model and test data for final evaluation
y_pred = model.predict(X_test)
_f1 = f1_score(y_test, y_pred, average='micro')
_confusion = confusion_matrix(y_test, y_pred)
__precision = precision_score(y_test, y_pred)
_recall = recall_score(y_test, y_pred)
_statistics = {'f1_score': _f1,
'confusion_matrix': _confusion,
'precision': __precision,
'recall': _recall
}
return model, _statistics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
classifiers_score.py 文件源码
项目:Stock-SentimentAnalysis
作者: JoshuaMichaelKing
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def classifier_score(tp, classifier, train_list, test, test_tag):
'''
?????????
Output:pos_precision, pos_recall, accuracy_score
'''
starttime = datetime.datetime.now()
classifier = SklearnClassifier(classifier)
classifier.train(train_list)
iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
pred = classifier.classify_many(test) # ????????list
y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
y_pred = [1 if tag == 'pos' else 0 for tag in pred]
pos_precision = precision_score(y_true, y_pred)
pos_recall = recall_score(y_true, y_pred)
endtime = datetime.datetime.now()
interval = (endtime - starttime).microseconds
interval = interval / 100
return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)
#------------------------------------------------------------------------------
def evaluate(path):
true = [int(pair[1] is None or gold[pair]) for pair in resources[path]]
pred = [int(pair[1] is not None) for pair in resources[path]]
tn, fp, fn, tp = confusion_matrix(true, pred).ravel()
return {
'tn': tn,
'fp': fp,
'fn': fn,
'tp': tp,
'precision': precision_score(true, pred),
'recall': recall_score(true, pred),
'f1': f1_score(true, pred),
'scores': scores(resources[path])
}
def evaluate(path):
G = resources[path]
pred = [int(has_sense_path(G, *pair)) for pair in union]
tn, fp, fn, tp = confusion_matrix(true, pred).ravel()
return {
'tn': tn,
'fp': fp,
'fn': fn,
'tp': tp,
'precision': precision_score(true, pred),
'recall': recall_score(true, pred),
'f1': f1_score(true, pred),
'scores': scores(G)
}
def analyzeResult_temp(data,model,DataVecs):
predict = model.predict(DataVecs)
data['predict'] = predict
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation and result analysis uing adjusted thresholds
def analyzeResult(data,model,DataVecs,threshold):
predict = model.predict_proba(DataVecs)[:,1]
True,False=1,0
data['predict'] = (predict > threshold)
print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
answer1 = data[data["label"] == 1]
answer2 = data[data["label"] == 0]
print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
try:
result_auc = model.predict_proba(DataVecs)
print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
average_precision_score(data["label"],result_auc[:,1])))
print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
recall_score(data["label"],data["predict"]),
f1_score(data["label"],data["predict"]),
matthews_corrcoef(data["label"],data["predict"])))
except:
print "ROC unavailable"
# Performance evaluation
def uar_score(labels: np.ndarray,
predictions: np.ndarray):
"""
Computes the unweighted average recall for the specified true labels and predictions.
The unweighted average recall is simply the average recall for each class without any weighting.
Parameters
----------
labels: numpy.ndarray
A one-dimensional numpy array containing the true labels of instances
predictions
A one-dimensional numpy array containing the predicted labels of instances
Returns
-------
float
The unweighted average recall for the specified true labels and predictions
"""
return recall_score(labels, predictions, average="macro")
def MyEvaluation(y_test,predicted):
def norm_me(x):
if str(type(x)).find("int")>-1:
return x
zix = np.argmax(x)
x1 = [0]*len(x)
x1[zix] = 1
return x1
predicted = [norm_me(x) for x in predicted]
predicted = np.array(predicted,dtype="uint8")
target_names = ['normal','malware']
inv_map = {v: k for k, v in KLABEL.items()}
target_names = [inv_map[x] for x in range(WORKING_KLABEL)]
result = classification_report(y_test,predicted,target_names=target_names)
print result
averagelabel = 'binary'
if B_MULTICLASS: averaegelabel = "macro"
v_precision = precision_score(y_test,predicted, average=averagelabel)
v_recall = recall_score(y_test,predicted, average=averagelabel)
(TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"])
return v_precision,v_recall,TP, FP, TN, FN
def display_evaluation_metrics(true_labels, predicted_labels, positive_class=1):
print 'Accuracy:', np.round(
metrics.accuracy_score(true_labels,
predicted_labels),
2)
print 'Precision:', np.round(
metrics.precision_score(true_labels,
predicted_labels,
pos_label=positive_class,
average='binary'),
2)
print 'Recall:', np.round(
metrics.recall_score(true_labels,
predicted_labels,
pos_label=positive_class,
average='binary'),
2)
print 'F1 Score:', np.round(
metrics.f1_score(true_labels,
predicted_labels,
pos_label=positive_class,
average='binary'),
2)
def get_metrics(true_labels, predicted_labels):
print 'Accuracy:', np.round(
metrics.accuracy_score(true_labels,
predicted_labels),
2)
print 'Precision:', np.round(
metrics.precision_score(true_labels,
predicted_labels,
average='weighted'),
2)
print 'Recall:', np.round(
metrics.recall_score(true_labels,
predicted_labels,
average='weighted'),
2)
print 'F1 Score:', np.round(
metrics.f1_score(true_labels,
predicted_labels,
average='weighted'),
2)
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
clf.fit(X_t_train, y_train)
app = dict()
score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
#app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
app['F2 Score'] = avg_sample_score
app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
app['P_AUPR'] = avg_prec
app['Precision'] = prec_score
app['Recall'] = rec_score
return app
def cv(feature_dict, feature, polarity, folds):
kfold = KFold(len(polarity), n_folds = folds)
count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
for train, test in kfold:
LR = LogisticRegression()
count += 1
x = [(feature[i]) for i in train]
y = [(polarity[i])for i in train]
LR.fit(scipy.sparse.vstack(x), (y))
test_label = []
answer_label = [(polarity[j]) for j in test]
for j in test:
query = feature[j]
result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
test_label.append(int(result[0]))
accuracy += accuracy_score(answer_label, test_label)
precision += precision_score(answer_label, test_label)
recall += recall_score(answer_label, test_label)
f1 += f1_score(answer_label, test_label)
print('{}_fold finished.'.format(count))
return accuracy, precision, recall, f1
def printResult(y_true, y_pred):
acc = accuracy_score(y_true, y_pred)
print("Accuracy: {:.4%}".format(acc))
precision = metrics.precision_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
f1_score = metrics.f1_score(y_true, y_pred)
confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
print "Precision:", precision
print "Recall:", recall
print "f1_score:", f1_score
print "confusion_matrix:"
print confusion_matrix
resultStr = "Precision: " + str(precision) +"\n" + \
"Recall: " + str(recall) + "\n" + \
"f1_score: " + str(f1_score) +"\n" + \
"confusion_matrix" + "\n" +\
str(confusion_matrix) + "\n"
return resultStr
def compute_score(self, conf, hy):
RS = recall_score(self.y, hy, average=None)
conf['_all_f1'] = M = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(f1_score(self.y, hy, average=None))}
conf['_all_recall'] = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(RS)}
conf['_all_precision'] = N = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(precision_score(self.y, hy, average=None))}
conf['_macrorecall'] = np.mean(RS)
if len(self.le.classes_) == 2:
conf['_macrof1'] = np.mean(np.array([v for v in conf['_all_f1'].values()]))
conf['_weightedf1'] = conf['_microf1'] = f1_score(self.y, hy, average='binary')
else:
conf['_macrof1'] = f1_score(self.y, hy, average='macro')
conf['_microf1'] = f1_score(self.y, hy, average='micro')
conf['_weightedf1'] = f1_score(self.y, hy, average='weighted')
conf['_accuracy'] = accuracy_score(self.y, hy)
if self.score.startswith('avgf1:'):
_, k1, k2 = self.score.split(':')
conf['_' + self.score] = (M[k1] + M[k2]) / 2
elif self.score.startswith('avgf1f0:'):
_, k1, k2 = self.score.split(':')
pos = (M[k1] + N[k1]) / 2.
neg = (M[k2] + N[k2]) / 2.
conf['_' + self.score] = (pos + neg) / 2.
conf['_score'] = conf['_' + self.score]
def evaluate_precision_recall(y, target, labels):
import sklearn.metrics as metrics
target = target[:len(y)]
num_classes = max(target) + 1
results = []
for i in range(num_classes):
class_target = _extract_single_class(i, target)
class_y = _extract_single_class(i, y)
results.append({
'precision': metrics.precision_score(class_target, class_y),
'recall': metrics.recall_score(class_target, class_y),
'f1': metrics.f1_score(class_target, class_y),
'fraction': sum(class_target)/len(target),
'#of_class': int(sum(class_target)),
'label': labels[i],
'label_id': i
# 'tp': tp
})
print('%d/%d' % (i, num_classes), results[-1])
accuracy = metrics.accuracy_score(target, y)
return accuracy, results
def test_ovr_multilabel_dataset():
base_clf = MultinomialNB(alpha=1)
for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
X, Y = datasets.make_multilabel_classification(n_samples=100,
n_features=20,
n_classes=5,
n_labels=2,
length=50,
allow_unlabeled=au,
random_state=0)
X_train, Y_train = X[:80], Y[:80]
X_test, Y_test = X[80:], Y[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
assert_true(clf.multilabel_)
assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
prec,
decimal=2)
assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
recall,
decimal=2)
def test_precision_recall_f_ignored_labels():
# Test a subset of labels may be requested for PRF
y_true = [1, 1, 2, 3]
y_pred = [1, 3, 3, 3]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
recall_all = partial(recall_score, y_true, y_pred, labels=None)
assert_array_almost_equal([.5, 1.], recall_13(average=None))
assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
assert_almost_equal((.5 * 2 + 1. * 1) / 3,
recall_13(average='weighted'))
assert_almost_equal(2. / 3, recall_13(average='micro'))
# ensure the above were meaningful tests:
for average in ['macro', 'weighted', 'micro']:
assert_not_equal(recall_13(average=average),
recall_all(average=average))
def test_zero_precision_recall():
# Check that pathological cases do not bring NaNs
old_error_settings = np.seterr(all='raise')
try:
y_true = np.array([0, 1, 2, 0, 1, 2])
y_pred = np.array([2, 0, 1, 1, 2, 0])
assert_almost_equal(precision_score(y_true, y_pred,
average='weighted'), 0.0, 2)
assert_almost_equal(recall_score(y_true, y_pred, average='weighted'),
0.0, 2)
assert_almost_equal(f1_score(y_true, y_pred, average='weighted'),
0.0, 2)
finally:
np.seterr(**old_error_settings)
def on_epoch_end(self, epoch, logs={}):
import numpy as np
from sklearn.metrics import recall_score, precision_score, roc_auc_score, f1_score
y_pred = self.model.predict(self.X_val)
y_pred = np.argmax(y_pred, axis=1)
recall = recall_score(self.y_val, y_pred, average=None).mean()
self.recall.append(recall)
logs['recall'] = recall
precision = precision_score(self.y_val, y_pred, average=None).mean()
self.precision.append(precision)
logs['precision'] = precision
auc = roc_auc_score(self.y_val, y_pred, average=None).mean()
self.auc.append(auc)
logs['auc'] = auc
f1 = f1_score(self.y_val, y_pred, average=None).mean()
self.f1.append(f1)
logs['f1'] = f1
def test(self, data, session):
ys_true = collections.deque([])
ys_pred = collections.deque([])
for batch in data:
y_pred = tf.argmax(self.get_output(), 1)
y_true = self.labels
feed_dict = {self.labels: batch[0].root_labels}
feed_dict.update(self.tree_lstm.get_feed_dict(batch[0]))
y_pred, y_true = session.run([y_pred, y_true], feed_dict=feed_dict)
ys_true += y_true.tolist()
ys_pred += y_pred.tolist()
ys_true = list(ys_true)
ys_pred = list(ys_pred)
score = metrics.accuracy_score(ys_true, ys_pred)
print "Accuracy", score
#print "Recall", metrics.recall_score(ys_true, ys_pred)
#print "f1_score", metrics.f1_score(ys_true, ys_pred)
print "confusion_matrix"
print metrics.confusion_matrix(ys_true, ys_pred)
return score