python类classification_report()的实例源码

ClassificationLogReg.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
two_fold_validation.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False):
    if use_tree:
        train_clf = Classifier(tree.DecisionTreeClassifier())
    else:
        train_clf = Classifier()

    print train_clf.clf
    print ''

    t_start = time.clock()
    train_clf.learn(train_features, train_labels)
    t_end = time.clock()
    if save_file:
        train_clf.save_to_file(open(save_file, 'w'))

    p_start = time.clock()
    predicted = train_clf.clf.predict(test_features)
    p_end = time.clock()

    test_labels_t = train_clf.labels.transform(test_labels)
    print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_)
    print 'Training time: %fs' % (t_end - t_start)
    print 'Predicting time: %fs' % (p_end - p_start)
    print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted)
    return train_clf.score(test_features, test_labels)
classifier.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def multiclass_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
    clf = softmax_network(X_train.shape[1], Y_train.shape[1])
    clf.fit(X_train, Y_train,
                        epochs=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_val, Y_val),
                        callbacks=[
                                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                    EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
                        ]
                        )
    acc = clf.test_on_batch(X_test, Y_test)[1]
    # confusion matrix and precision-recall
    true = np.argmax(Y_test,axis=1)
    pred = np.argmax(clf.predict(X_test), axis=1)
    print confusion_matrix(true, pred)
    print classification_report(true, pred)
    return acc
classifier.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def metrics_equal():
    dataset_path = dpu.generate_equal_dataset()
    dataset = dpu.load(dataset_path)
    mm = SGDCModelManager()

    mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42)
    mm.train()
    predicts = mm.predict(mm.x_test)

    report = classification_report(mm.y_test, predicts)

    return jsonify(status=200, message=report)
ClassificationSVM.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
ClassificationKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
ClassificationRandomForest.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestClassifier(n_estimators=30), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
logistic_regression.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        lr_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = lr_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return lr_detector
svc.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        svc_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = svc_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return svc_detector
decision_tree.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        dt_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = dt_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return dt_detector
random_forest_classifier.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        rf_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = rf_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return rf_detector
evaluate_result.py 文件源码 项目:entity_binding 作者: JasperGuo 项目源码 文件源码 阅读 86 收藏 0 点赞 0 评论 0
def main(log_file, table_file):
    """
    :param log_file:
    :param table_file:
    :return:
    """
    tables = read_tables(table_file)
    table_dict = build_table_dict(tables)
    questions = read_log(log_file)
    truth = list()
    prediction = list()
    for q in questions:
        process(q, table_dict[q["tid"]])
        t, p = recalc_index(q)
        truth += t
        prediction += p

    file_base_name = os.path.basename(log_file)
    dirname = os.path.dirname(log_file)
    file = os.path.join(dirname, "processed_" + file_base_name)
    report = classification_report(truth, prediction, target_names=["PAT", "LIT", "TAB", "COL", "CELL"])
    save(questions, report, file)
model.py 文件源码 项目:traffic-v2 作者: vnetserg 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def score_model(model, data_test, labeler):
    '''
        ??????? ?????????????????? ??????,
        ?????? ? ??????????? ????? ??? ???????:
        ???????? ?????????, ???????? ??????? ?
        ???????? ??? ??????? ??????, ????????
        ? ????????????? ??????.
        ?????????:
            model - ????????? ??????
            data_test - ??????????? ???????
            labeler - LabelEncoder ?????? ???????
        ??????????:
            ??????
    '''
    X_test = data_test.drop(["proto"], axis=1)
    y_test = data_test["proto"]
    y_predicted = model.predict(X_test)

    true_labels = labeler.inverse_transform(y_test)
    predicted_labels = labeler.inverse_transform(y_predicted)

    print feature_importances_report(model, X_test.columns)
    print "\n", classification_report(true_labels, predicted_labels)
    print cross_class_report(true_labels, predicted_labels)
pipeline.py 文件源码 项目:RIDDLE 作者: jisungk 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def evaluate(y_test, y_test_proba, nb_classes, path):
    from riddle import roc # here so np can be seeded before run_pipeline() call

    y_pred = [np.argmax(p) for p in y_test_proba]

    print('Confusion matrix:')
    print(confusion_matrix(y_test, y_pred))
    print()

    print('Classification report:')
    print(classification_report(y_test, y_pred, digits=3))

    print('ROC AUC values:')
    roc_auc, fpr, tpr = roc.compute_roc(y_test, y_test_proba, 
        nb_classes=nb_classes)
    roc.save_plots(roc_auc, fpr, tpr, nb_classes=nb_classes, path=path)

    for l, r in roc_auc.items():
        print('  {}: {:.5f}'.format(l, r))
    print()

# ---------------------------- PUBLIC FUNCTIONS ------------------------------ #
behaviorView.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        self.classifier = SVC()
        self.classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}
        for user in self.testDict:
            pred_labels[user] = self.classifier.predict([[self.BDS[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel

        return pred_labels
unigram.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)

        corpus = [dictionary.doc2bow(text) for text in corpus]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
TF_IDF.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)
        corpus = [dictionary.doc2bow(text) for text in corpus]
        model = models.TfidfModel(corpus)
        corpus = [text for text in model[corpus]]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
DegreeSAD.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 50 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)
        pred_labels = {}
        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)

        for user in self.testDict:
            pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
model.py 文件源码 项目:wende 作者: h404bi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_model(self, n_folds=10):
        """ ?? `??K-??????Stratified K-folds cross-validating?`
            ???????
        """
        logging.debug("testing model with {}-folds CV".format(n_folds))
        model = self.init_model()
        X = self.data.data
        y = self.data.target

        cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42)

        t0 = time()
        y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv)
        t = time() - t0
        print("=" * 52)
        print("time cost: {}".format(t))
        print()
        print("confusion matrix\n", metrics.confusion_matrix(y, y_pred))
        print()
        print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred)))
        print()
        print("\t\tclassification report")
        print("-" * 52)
        print(metrics.classification_report(y, y_pred))
main.py 文件源码 项目:MyCommentOnTensorFlowModel 作者: guotong1988 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test(self):
        lenW = len(self.vectorizer.vocabulary_)
        W = 3*lenW
        Y_true = []
        Y_pred = []
        for i,line in enumerate(self.test_lines):
            if line['type'] == 'q':
                r = line['answer']
                id = line['id']-1
                indices = [idx for idx in range(i-id, i+1)]
                memory_list = self.L_test[indices]

                m_o1 = O_t([id], memory_list, self.s_Ot)
                m_o2 = O_t([id, m_o1], memory_list, self.s_Ot)

                bestVal = None
                best = None
                for w in self.vectorizer.vocabulary_:
                    val = self.sR([id, m_o1, m_o2], self.H[w], memory_list, self.V)
                    if bestVal is None or val > bestVal:
                        bestVal = val
                        best = w
                Y_true.append(r)
                Y_pred.append(best)
        print metrics.classification_report(Y_true, Y_pred)
learning_kernel.py 文件源码 项目:HappyCat 作者: sparktsao 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def MyEvaluation(y_test,predicted):
    def norm_me(x):
        if str(type(x)).find("int")>-1:
            return x
        zix = np.argmax(x)
        x1 = [0]*len(x)
        x1[zix] = 1
        return x1
    predicted = [norm_me(x) for x in predicted]
    predicted = np.array(predicted,dtype="uint8")

    target_names  = ['normal','malware']
    inv_map = {v: k for k, v in KLABEL.items()}
    target_names = [inv_map[x] for x in range(WORKING_KLABEL)]
    result = classification_report(y_test,predicted,target_names=target_names)
    print result

    averagelabel = 'binary'
    if B_MULTICLASS: averaegelabel = "macro"

    v_precision = precision_score(y_test,predicted, average=averagelabel)
    v_recall = recall_score(y_test,predicted, average=averagelabel)    

    (TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"])
    return v_precision,v_recall,TP, FP, TN, FN
stats.py 文件源码 项目:odin 作者: imito 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def classification_report(y_pred, y_true, labels):
  """
  Parameters
  ----------
  pass

  Return
  ------
  Classification report in form of string
  """
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
  # ====== validate labels ====== #
  labels = as_tuple(labels)
  target_names = [str(i) for i in labels]
  labels = list(range(0, len(labels)))
  # ====== create report ====== #
  s = ""
  s += "Accuracy: %f\n" % accuracy_score(y_true, y_pred, normalize=True)
  s += "Confusion matrix:\n"
  s += str(confusion_matrix(y_true, y_pred, labels=labels)) + '\n'
  s += "Report:\n"
  s += str(classification_report(y_true, y_pred, labels=labels, digits=3,
                                 target_names=target_names))
  return s
training.py 文件源码 项目:static-gesture-recognition 作者: windmark 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def splitValidateModel(self, visualizePredictions = False):
    (label_vector, input_vector) = loadData(self.featureFile)

    indexArray = range(0, len(input_vector))
    trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \
      cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit))

    kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
    kNNClassifier.fit(trainData, trainLabels) 
    predictedLabels = kNNClassifier.predict(testData)

    print("Classification report for classifier %s:\n%s\n"
          % ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels)))
    print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels))
    print('Split Validation training :: Done.\n')

    if visualizePredictions:
      self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
svm_clusterization_test.py 文件源码 项目:rbm_based_autoencoders_with_tensorflow 作者: ikhlestov 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_svm_estimator(estimator, notes, encodings_train, labels_train,
                       encodings_test, labels_test):
    t0 = time()
    estimator.fit(encodings_train, labels_train)
    print("Time cons: %.2fs, type: %s" % (time() - t0, notes))
    predicted = estimator.predict(encodings_test)
    accuracy = metrics.accuracy_score(labels_test, predicted)
    print("Accuracy: %.5f" % accuracy)
    report = metrics.classification_report(labels_test, predicted)
    print(report)
    prec_recall_f_score = metrics.precision_recall_fscore_support(
        labels_test, predicted)
    print('-' * 10)
    prec_recall_f_score_dict = {
        'prec': np.mean(prec_recall_f_score[0]),
        'recall': np.mean(prec_recall_f_score[1]),
        'f_score': np.mean(prec_recall_f_score[2])
    }
    return accuracy, prec_recall_f_score_dict
crf_task.py 文件源码 项目:keyphrase-extraction 作者: sagarchaturvedi1 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def classify(y_true, y_pred):

    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels = [class_indices[cls] for cls in tagset],
        target_names = tagset,
    )
classifier.py 文件源码 项目:productner 作者: etano 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def evaluate(self, x_test, y_test, batch_size=256):
        """Evaluate classifier

        Args:
            x_test (np.array): 3D numpy array (n_samples, embedding_dim, tokenizer.max_sequence_length)
            y_test (np.array): 2D numpy array (n_samples, len(self.category_map))
            batch_size (int): Training batch size
        """
        print('Evaluating...')
        predictions_last_epoch = self.model.predict(x_test, batch_size=batch_size, verbose=1)
        predicted_classes = np.argmax(predictions_last_epoch, axis=1)
        target_names = ['']*len(self.category_map)
        for category in self.category_map:
            target_names[self.category_map[category]] = category
        y_val = np.argmax(y_test, axis=1)
        print(classification_report(y_val, predicted_classes, target_names=target_names, digits = 6))
train.py 文件源码 项目:rnn-sentiment-analysis 作者: kashizui 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def evaluate(args, model, data):
    train_predict = model.predict(data.trainX)
    print("TRAINING RESULTS")
    print(classification_report(
        [e[1] for e in data.trainY],
        [utils.get_sentiment(e[1]) for e in train_predict],
    ))
    print()

    test_predict = model.predict(data.valX)
    print("DEV RESULTS")
    print(classification_report(
        [e[1] for e in data.valY],
        [utils.get_sentiment(e[1]) for e in test_predict],
    ))
    print()

    if args['--evaluate-test']:
        test_predict = model.predict(data.testX)
        print("TEST RESULTS")
        print(classification_report(
            [e[1] for e in data.testY],
            [utils.get_sentiment(e[1]) for e in test_predict],
        ))
        print()
svm.py 文件源码 项目:dancedeets-monorepo 作者: mikelambert 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def eval_model(name, model, data):
    print '=' * 20
    print name, 'training'
    model.fit(data, train.target, sample_weight=sample_weights)
    print name, 'trained'

    predictions = model.predict(processed_test_data)
    print name, 'accuracy', np.mean(predictions == test.target)

    print(metrics.classification_report(test.target, predictions))
    print metrics.confusion_matrix(test.target, predictions)

    print name, 'f1 cross validation', cross_validation.cross_val_score(model, grammar_processed_data, train.target, scoring='f1')
    print name, 'precision cross validation', cross_validation.cross_val_score(
        model, grammar_processed_data, train.target, scoring='precision'
    )
    return model, predictions


# SVM need balance on input features, same ranges and variances and stuff like that
evaluation.py 文件源码 项目:lstm-crf-ner 作者: qfzxhy 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def bio_classification_report(y_gold,y_pred):
    #y_gold: [[],[],[]]
    #y_pred:
    lb = LabelBinarizer()
    y_gold_combined = lb.fit_transform(list(chain.from_iterable(y_gold)))
    y_pred_combined = lb.fit_transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset,key=lambda tag: tag.split('-',1)[::-1])
    class_indices = {cls:idx for idx,cls in enumerate(lb.classes_)}
    return classification_report(
        y_gold_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset
    )
run_seq2seq.py 文件源码 项目:DialogueBreakdownDetection2016 作者: icoxfog417 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def classify():
    reader = DbdReader(DATA_DIR, TRAIN_PATH, target_for_vocabulary=TARGET_PATH, max_vocabulary_size=_vocab_size_, filter="140", threshold=0.6, clear_when_exit=False)
    reader.init()
    dataset, user_vocab, system_vocab = reader.get_dataset()

    labels = reader.get_labels()
    model = make_model(user_vocab, system_vocab)
    model_if = model.create_interface(_buckets_, TRAIN_DIR)

    train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42)

    with tf.Session() as sess:
        detector = Detector(sess, model_if)
        detector.train(sess, train_x, train_t)
        y = [detector.predict(sess, p) for p in test_x]
        y = [lb for lb, prob in y]

    report = classification_report([lb.label for lb in test_t], y, target_names=DbdReader.get_label_names())
    print(report)


问题


面经


文章

微信
公众号

扫码关注公众号