def test_detector(self):
dataset, user_vocab, system_vocab = self.Reader.get_dataset()
_labels = self.Reader.get_labels()
labels = [lb.label for lb in _labels]
model = self.make_model(user_vocab, system_vocab)
model_if = model.create_interface(self.buckets, self.TRAIN_DIR)
train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42)
with tf.Session() as sess:
detector = Detector(sess, model_if)
detector.train(sess, train_x, train_t)
y = [detector.predict(sess, p) for p in test_x]
report = classification_report(test_t, y, target_names=DbdReader.get_label_names())
print(report)
python类classification_report()的实例源码
test_with_dbd.py 文件源码
项目:DialogueBreakdownDetection2016
作者: icoxfog417
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def train_segmenter(self, data, targets, target_names, test=True):
'''
Trains a support vector machines classifier and returns the
trained model and test report if test flag was on.
'''
X_train, X_test, y_train, y_test= train_test_split(data,
targets, test_size=0.2, random_state=42)
svc = SVC(probability=True)
if test:
clf = svc.fit(X_train, y_train)
pred= clf.predict(X_test)
report = classification_report(y_test, pred,
target_names=target_names)
return clf, report
else:
clf = svc.fit(data, targets)
return clf
def backtestHistory(_initial_virtual_shares, _start_date, _stockcode, _interval,_train_batch_size = 100):
ZZZZ = Investor(_name='ZZZZ', _initial_virtual_shares=_initial_virtual_shares, _start_date=_start_date, _stockcode=_stockcode, _interval=_interval,_train_batch_size = _train_batch_size)
total = ZZZZ.maxcnt-ZZZZ.now
# pbar = ProgressBar(widgets=[' ', AnimatedMarker(), 'Predicting: ', Percentage()], maxval=total).start()
while ZZZZ.now < ZZZZ.maxcnt:
# pbar.update(ZZZZ.now)
# time.sleep(0.01)
ZZZZ.TradeNext(use_NN=False)
# pbar.finish()
print
print classification_report(ZZZZ.TRUEY, ZZZZ.PREDY)
f1 = f1_score(ZZZZ.TRUEY, ZZZZ.PREDY)
accuracy = accuracy_score(ZZZZ.TRUEY, ZZZZ.PREDY)
print "accuracy:", accuracy
print "f1: ",f1
predROR = ZZZZ.getTotalROR()[0]
realROR = ZZZZ.getTotalROR()[1]
assert not (realROR == 0)
print 'pred ROR:', predROR, '%', '\t|\treal ROR:', realROR, '%'
return predROR, realROR, f1, accuracy, total, ZZZZ.TRAINERROR
def score_binary_classification(y, y_hat, report=True):
"""
Create binary classification output
:param y: true value
:param y_hat: class 1 probabilities
:param report:
:return:
"""
y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat] # convert probability to class for classification report
report_string = "---Binary Classification Score--- \n"
report_string += classification_report(y, y_hat_class)
score = roc_auc_score(y, y_hat)
report_string += "\nAUC = " + str(score)
if report:
print(report_string)
return score, report_string
def score_multiclass_classification(y, y_hat, report=True):
"""
Create multiclass classification score
:param y:
:param y_hat:
:return:
"""
report_string = "---Multiclass Classification Score--- \n"
report_string += classification_report(y, y_hat)
score = accuracy_score(y, y_hat)
report_string += "\nAccuracy = " + str(score)
if report:
print(report_string)
return score, report_string
def get_save_results(X_train, X_test, y_train, y_test, model, description, params=None):
# Fit model and log experiment
model.fit(X_train, y_train)
predictions = model.predict(X_test)
write = description + '\n'
if hasattr(model, 'best_params_'):
write += 'Best params: ' + str(model.best_params_) + '\n'
if params:
write += 'Params: ' + str(params) + '\n'
write += 'Training Score: ' + str(model.score(X_train, y_train)) + '\n'
write += 'Testing Score: ' + str(model.score(X_test, y_test)) + '\n'
if description == 'NN':
y_test = pd.DataFrame(y_test).stack()
y_test = pd.Series(pd.Categorical(y_test[y_test != 0].index.get_level_values(1)))
write += str(classification_report(y_test, predictions)) + '\n'
write += str(confusion_matrix(y_test, predictions)) + '\n'
print write
with open('notes/experiments', 'a') as f:
f.write(write)
return model
def test_RandomizedSearchCV():
'''
Use RandomizedSearchCV and LogisticRegression, to improve C, multi_class.
:return: None
'''
digits = load_digits()
X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,
test_size=0.25,random_state=0,stratify=digits.target)
tuned_parameters ={ 'C': scipy.stats.expon(scale=100),
'multi_class': ['ovr','multinomial']}
clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6),
tuned_parameters,cv=10,scoring="accuracy",n_iter=100)
clf.fit(X_train,y_train)
print("Best parameters set found:",clf.best_params_)
print("Randomized Grid scores:")
for params, mean_score, scores in clf.grid_scores_:
print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))
print("Optimized Score:",clf.score(X_test,y_test))
print("Detailed classification report:")
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
def Score_to_threshold(clz, X_score, *, y=None, score=100, round_=4):
"""??????????????????????,??????????????.
???????????????????????.
Attributes:
X_score (Sequence[number]): - ?????
y (Sequence[number]): - ?????,????
score (number): - ???,?????T,???F
"""
score_array = np.array(X_score)
if y is not None:
print(precision_score(y, (score_array > score), average='macro'))
print(classification_report(y, (score_array > score)))
return round(len(score_array[score_array > score]) / len(score_array),
round_)
def eval_perf(classification):
y_true = []
y_pred = []
for (key, value) in classification.iteritems():
y_true.extend([parse_class(key)])
y_pred.extend([value])
print_verbose("Classification pair: %s" % str((key, value)), 4)
print_verbose("True classes: %s" % str(y_true), 5)
print_verbose("Predicted classes: %s" % str(y_pred), 5)
# Print results
print_verbose("True classes: %s" % str(y_true), 2)
print_verbose("Predicted classes: %s" % str(y_pred), 2)
# Print metrics
print_verbose("Confusion Matrix:", 0)
print_verbose(metrics.confusion_matrix(y_true, y_pred), 0)
print_verbose("Classification Report:", 0)
print_verbose(metrics.classification_report(y_true, y_pred), 0)
def test_classification_report_multiclass_with_long_string_label():
y_true, y_pred, _ = make_prediction(binary=False)
labels = np.array(["blue", "green"*5, "red"])
y_true = labels[y_true]
y_pred = labels[y_pred]
expected_report = """\
precision recall f1-score support
blue 0.83 0.79 0.81 24
greengreengreengreengreen 0.33 0.10 0.15 31
red 0.42 0.90 0.57 20
avg / total 0.51 0.53 0.47 75
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
def __call__(self, sess, epoch, iteration, model, loss):
if iteration == 0 and epoch % self.at_every_epoch == 0:
total = 0
correct = 0
truth_all = []
pred_all = []
for values in self.batcher:
total += len(values[-1])
feed_dict = {}
for i in range(0, len(self.placeholders)):
feed_dict[self.placeholders[i]] = values[i]
truth = np.argmax(values[-1], 1) # values[2], batch sampled from data[2], is a 3-legth one-hot vector containing the labels. this is to transform those back into integers
predicted = sess.run(tf.arg_max(tf.nn.softmax(model), 1),
feed_dict=feed_dict)
correct += sum(truth == predicted)
truth_all.extend(truth)
pred_all.extend(predicted)
print(classification_report(truth_all, pred_all, target_names=["NONE", "AGAINST", "FAVOR"], digits=4))
def report_cv(clf,fv_test,target_test):
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
target_true, target_pred = target_test, clf.predict(fv_test)
print(classification_report(target_true, target_pred))
print()
def on_epoch_end(self, epoch, logs={}):
print("Generating Classification Report:")
pred = np.argmax(self.model.predict(self.x_eval), axis=1)
truth = np.argmax(self.y_eval, axis=1)
target_names = [self.labels[i] for i in range(len(self.labels))]
print(classification_report(truth, pred, target_names=target_names))
evaluation.py 文件源码
项目:scik-learn-learn-Chinese-text-classider
作者: chapzq77
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def predict_result_report(actual,predict,catetory):
print(metrics.classification_report(actual,predict,target_names=catetory))
def train_test_equal():
dataset_path = dpu.generate_equal_dataset()
dataset = dpu.load(dataset_path)
mm = SGDCModelManager()
mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42)
mm.train()
score = mm.score()
probabilities = mm.predict(mm.x_test)
print(mm.score())
print(classification_report(mm.y_test, probabilities))
return jsonify(status=200, score=score)
def Precision(clf):
doc_class_predicted = clf.predict(x_test)
print(np.mean(doc_class_predicted == y_test))#?????????
#???????
precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))
answer = clf.predict_proba(x_test)[:,1]
report = answer > 0.5
print(classification_report(y_test, report, target_names = ['neg', 'pos']))
print("--------------------")
from sklearn.metrics import accuracy_score
print('???: %.2f' % accuracy_score(y_test, doc_class_predicted))
def print_confusion_matrix(y_test, nb_predict_test):
print ("Confusion Matrix")
print("{0}".format(metrics.confusion_matrix(y_test, nb_predict_test, labels=['malware', 'benign'])))
print("")
print("Classification Report")
print(metrics.classification_report(y_test, nb_predict_test, labels=['malware', 'benign']))
get_feature_from_model.py 文件源码
项目:Sensor-Specific-Hyperspectral-Image-Feature-Learning
作者: MeiShaohui
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def get_metric(self):
self.get_y_pred()
#self.get_ip1()
self.y_true = self.label
self.y_pred = self.feature.argmax(1)
self.classify_report = metrics.classification_report(self.y_true, self.y_pred)
self.confusion_matrix = metrics.confusion_matrix(self.y_true, self.y_pred)
self.overall_accuracy = metrics.accuracy_score(self.y_true, self.y_pred)
self.acc_for_each_class = metrics.precision_score(self.y_true, self.y_pred, average=None)
self.average_accuracy = np.mean(self.acc_for_each_class)
print metrics.accuracy_score(self.y_true, self.y_pred)
def evaluate(self, test_examples, test_labels):
predictions = self.predict(test_examples)
print(classification_report(test_labels, predictions))
def get_metrics(actual_labels_file, predict_labels_file):
util.check_required_program_args([actual_labels_file, predict_labels_file])
actual_labels_df = pd.read_csv(actual_labels_file, names=['image', 'label'], header=0)
predict_labels_df = pd.read_csv(predict_labels_file, names=['image', 'label'], header=0)
# assumes equal number of items in both file
assert (actual_labels_df['image'].count()) == predict_labels_df['image'].count()
actual_labels_df = actual_labels_df.sort_values(by=['image'])
predict_labels_df = predict_labels_df.sort_values(by=['image'])
assert (list(actual_labels_df['image'].values) == list(predict_labels_df['image'].values))
# Hopefully y_true and y_pred are alligned properly.
y_labels = actual_labels_df['image'].values
y_true = actual_labels_df['label'].values
y_pred = predict_labels_df['label'].values
print "Confusion matrix:"
print confusion_matrix(y_true, y_pred)
print ""
print "Classification report:"
print classification_report(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
kappa = quadratic_weighted_kappa(y_true, y_pred)
print('Accuracy: %.4f' % accuracy)
print('Kappa: %.4f' % kappa)
print ""