python类precision_recall_fscore_support()的实例源码-第2页-面圈网

entailment_graph.py 文件源码项目：OKR 作者: vered1986 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def compute_entities_f1(gold_graph, pred_graph):
    """
    Compute the agreement for the entity entailment graph, for each entity, and return the average
    :param gold_graph: the first annotator's graph
    :param pred_graph: the second annotator's graph
    :return: the entity edges' mean F1 score
    """

    # Get all the possible edges in the entity entailment graph
    all_edges = {str(entity): set([(str(m1), str(m2))
                                   for m1 in entity.mentions.values()
                                   for m2 in entity.mentions.values() if m1 != m2])
                 for entity in gold_graph.entities.values() if len(entity.mentions) > 1}

    # Get the binary predictions/gold for these edges
    str_entities_gold = { entity : str(entity) for entity in gold_graph.entities.values() }
    entity_entailments_gold = {str_entities_gold[entity]:
                                [1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0
                                 for (m1, m2) in all_edges[str_entities_gold[entity]]]
                            for entity in gold_graph.entities.values() if str_entities_gold[entity] in all_edges.keys()}

    str_entities_pred = { entity : str(entity) for entity in pred_graph.entities.values() }
    entity_entailments_pred = {str_entities_pred[entity]:
                                [1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0
                                 for (m1, m2) in all_edges[str_entities_pred[entity]]]
                            for entity in pred_graph.entities.values() if str_entities_pred[entity] in all_edges.keys()}

    mutual_entities = list(set(entity_entailments_gold.keys()).intersection(entity_entailments_pred.keys()))

    # If both graphs contain no entailments, the score should be one
    f1 = np.mean([precision_recall_fscore_support(entity_entailments_gold[entity], entity_entailments_pred[entity],
                                                  average='binary')[2]
                  if np.sum(entity_entailments_gold[entity]) > 0 or np.sum(entity_entailments_pred[entity]) > 0 else 1.0
                  for entity in mutual_entities])

    return f1

output_utils.py 文件源码项目：bird_audio_detection_challenge 作者: topel 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def acc_f1_roc(gt, prob, pred):

    acc = accuracy_score(gt, pred)*100.
    acc_not_normed = accuracy_score(gt, pred, normalize=False)
    f1 = f1_score(gt, pred)*100.
    roc = roc_auc_score(gt, prob, average='macro')*100.
    p, r, _, _ = precision_recall_fscore_support(gt, pred, average='binary')
    # print p, r
    return acc, acc_not_normed, f1, roc, p, r

run.py 文件源码项目：LeadQualifier 作者: xeneta 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def getScores(clf, X, y):
    predictions = clf.predict(X)
    scores = precision_recall_fscore_support(y, predictions, average='binary')
    return scores

run.py 文件源码项目：LeadQualifier 作者: xeneta 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def getScores(clf, X, y):
    predictions = clf.predict(X)
    scores = precision_recall_fscore_support(y, predictions, average='binary')
    return scores

# Import data

validation.py 文件源码项目：DRM 作者: JohnZhengHub 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_single(data, label, model):
    prediction = model.predict(data)
    #return float(np.sum(prediction == label)) / len(label)
    pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
    f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
    return f1

validation.py 文件源码项目：DRM 作者: JohnZhengHub 项目源码文件源码阅读 54 收藏 0 点赞 0 评论 0

def test_rating(data, label, model):
    prediction = model.predict(data)
    #return float(np.sum(prediction % len(loadFile.aspect_dic) == (label % len(loadFile.aspect_dic)))) / len(label)
    prediction = prediction % len(loadFile.aspect_dic)
    label = label % len(loadFile.aspect_dic)
    pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
    f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
    return f1

validation.py 文件源码项目：DRM 作者: JohnZhengHub 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_aspect(data, label, model):
    prediction = model.predict(data)
    #return float(np.sum(prediction // len(loadFile.aspect_dic) == (label // len(loadFile.aspect_dic)))) / len(label)
    prediction = prediction // len(loadFile.aspect_dic)
    label = label // len(loadFile.aspect_dic)
    pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
    f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
    return f1

validation.py 文件源码项目：DRM 作者: JohnZhengHub 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_mat(data, label, model):
    prediction1 = model[0].predict(data)
    prediction2 = model[1].predict(data)
    #return float(np.logical_and(prediction1 == label[:, 0], prediction2 == label[:, 1]).sum()) / len(label)
    label = label[:, 0] * 100 + label[:, 1]
    prediction = prediction1 * 100 + prediction2
    pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction)
    f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
    return f1

runNNet.py 文件源码项目：DRM 作者: JohnZhengHub 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def test(net_file, data_set, label_method, model='RNN', trees=None):
    if trees is None:
        trees = tree.load_all(data_set, label_method)
    assert net_file is not None, "Must give model to test"
    print "Testing netFile %s" % net_file
    with open(net_file, 'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)

        if model == 'RNTN':
            nn = RNTN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch)
        elif model == 'RNN':
            nn = RNN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch)
        elif opts.model == 'TreeLSTM':
            nn = TreeLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho)
        elif opts.model == 'TreeTLSTM':
            nn = TreeTLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho)
        else:
            raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model

        nn.init_params()
        nn.from_file(fid)

    print "Testing %s..." % model

    cost, correct, guess = nn.cost_and_grad(trees, test=True)
    correct_sum = 0
    for i in xrange(0, len(correct)):
        correct_sum += (guess[i] == correct[i])

    confusion = [[0 for i in range(nn.output_dim)] for j in range(nn.output_dim)]
    for i, j in zip(correct, guess): confusion[i][j] += 1
    # makeconf(confusion)

    pre, rec, f1, support = metrics.precision_recall_fscore_support(correct, guess)
    #print "Cost %f, Acc %f" % (cost, correct_sum / float(len(correct)))
    #return correct_sum / float(len(correct))
    f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:]))
    print "Cost %f, F1 %f, Acc %f" % (cost, f1, correct_sum / float(len(correct)))
    return f1

test_classification.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_precision_recall_f1_score_binary():
    # Test Precision Recall and F1 Score for binary classification task
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
    assert_array_almost_equal(p, [0.73, 0.85], 2)
    assert_array_almost_equal(r, [0.88, 0.68], 2)
    assert_array_almost_equal(f, [0.80, 0.76], 2)
    assert_array_equal(s, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    assert_dep_warning = partial(assert_warns, DeprecationWarning)
    for kwargs, my_assert in [({}, assert_no_warnings),
                              ({'average': 'binary'}, assert_no_warnings),
                              ({'average': 'micro'}, assert_dep_warning)]:
        ps = my_assert(precision_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(ps, 0.85, 2)

        rs = my_assert(recall_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(rs, 0.68, 2)

        fs = my_assert(f1_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(fs, 0.76, 2)

        assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2,
                                      **kwargs),
                            (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)

test_classification.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_precision_recall_fscore_support_errors():
    y_true, y_pred, _ = make_prediction(binary=True)

    # Bad beta
    assert_raises(ValueError, precision_recall_fscore_support,
                  y_true, y_pred, beta=0.0)

    # Bad pos_label
    assert_raises(ValueError, precision_recall_fscore_support,
                  y_true, y_pred, pos_label=2, average='macro')

    # Bad average option
    assert_raises(ValueError, precision_recall_fscore_support,
                  [0, 1, 2], [1, 2, 0], average='mega')

test_classification.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def test_precision_refcall_f1_score_multilabel_unordered_labels():
    # test that labels need not be sorted in the multilabel case
    y_true = np.array([[1, 1, 0, 0]])
    y_pred = np.array([[0, 0, 1, 1]])
    for average in ['samples', 'micro', 'macro', 'weighted', None]:
        p, r, f, s = precision_recall_fscore_support(
            y_true, y_pred, labels=[3, 0, 1, 2], warn_for=[], average=average)
        assert_array_equal(p, 0)
        assert_array_equal(r, 0)
        assert_array_equal(f, 0)
        if average is None:
            assert_array_equal(s, [0, 1, 1, 0])

test_classification.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_precision_recall_f1_score_multiclass_pos_label_none():
    # Test Precision Recall and F1 Score for multiclass classification task
    # GH Issue #1296
    # initialize data
    y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1])
    y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])

    # compute scores with default labels introspection
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 pos_label=None,
                                                 average='weighted')

test_classification.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_precision_recall_f1_no_labels():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    # tp = [0, 0, 0]
    # fn = [0, 0, 0]
    # fp = [0, 0, 0]
    # support = [0, 0, 0]
    # |y_hat_i inter y_i | = [0, 0, 0]
    # |y_i| = [0, 0, 0]
    # |y_hat_i| = [0, 0, 0]

    for beta in [1]:
        p, r, f, s = assert_warns(UndefinedMetricWarning,
                                  precision_recall_fscore_support,
                                  y_true, y_pred, average=None, beta=beta)
        assert_array_almost_equal(p, [0, 0, 0], 2)
        assert_array_almost_equal(r, [0, 0, 0], 2)
        assert_array_almost_equal(f, [0, 0, 0], 2)
        assert_array_almost_equal(s, [0, 0, 0], 2)

        fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                             y_true, y_pred, beta=beta, average=None)
        assert_array_almost_equal(fbeta, [0, 0, 0], 2)

        for average in ["macro", "micro", "weighted", "samples"]:
            p, r, f, s = assert_warns(UndefinedMetricWarning,
                                      precision_recall_fscore_support,
                                      y_true, y_pred, average=average,
                                      beta=beta)
            assert_almost_equal(p, 0)
            assert_almost_equal(r, 0)
            assert_almost_equal(f, 0)
            assert_equal(s, None)

            fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                                 y_true, y_pred,
                                 beta=beta, average=average)
            assert_almost_equal(fbeta, 0)

clf_utils.py 文件源码项目：SourceFilterContoursMelody 作者: juanjobosch 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def clf_metrics(p_train, p_test, y_train, y_test):
    """ Compute metrics on classifier predictions

    Parameters
    ----------
    p_train : np.array [n_samples]
        predicted probabilities for training set
    p_test : np.array [n_samples]
        predicted probabilities for testing set
    y_train : np.array [n_samples]
        Training labels.
    y_test : np.array [n_samples]
        Testing labels.

    Returns
    -------
    clf_scores : dict
        classifier scores for training set
    """
    y_pred_train = 1*(p_train >= 0.5)
    y_pred_test = 1*(p_test >= 0.5)

    train_scores = {}
    test_scores = {}

    train_scores['accuracy'] = metrics.accuracy_score(y_train, y_pred_train)
    test_scores['accuracy'] = metrics.accuracy_score(y_test, y_pred_test)

    train_scores['mcc'] = metrics.matthews_corrcoef(y_train, y_pred_train)
    test_scores['mcc'] = metrics.matthews_corrcoef(y_test, y_pred_test)

    (p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
                                                           y_pred_train)
    train_scores['precision'] = p
    train_scores['recall'] = r
    train_scores['f1'] = f
    train_scores['support'] = s

    (p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
                                                           y_pred_test)
    test_scores['precision'] = p
    test_scores['recall'] = r
    test_scores['f1'] = f
    test_scores['support'] = s

    train_scores['confusion matrix'] = \
        metrics.confusion_matrix(y_train, y_pred_train, labels=[0, 1])
    test_scores['confusion matrix'] = \
        metrics.confusion_matrix(y_test, y_pred_test, labels=[0, 1])

    train_scores['auc score'] = \
        metrics.roc_auc_score(y_train, p_train + 1, average='weighted')
    test_scores['auc score'] = \
        metrics.roc_auc_score(y_test, p_test + 1, average='weighted')

    clf_scores = {'train': train_scores, 'test': test_scores}

    return clf_scores

mv_gaussian.py 文件源码项目：SourceFilterContoursMelody 作者: juanjobosch 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def melodiness_metrics(m_train, m_test, y_train, y_test):
    """ Compute metrics on melodiness score

    Parameters
    ----------
    m_train : np.array [n_samples]
        melodiness scores for training set
    m_test : np.array [n_samples]
        melodiness scores for testing set
    y_train : np.array [n_samples]
        Training labels.
    y_test : np.array [n_samples]
        Testing labels.

    Returns
    -------
    melodiness_scores : dict
        melodiness scores for training set
    """
    m_bin_train = 1*(m_train >= 1)
    m_bin_test = 1*(m_test >= 1)

    train_scores = {}
    test_scores = {}

    train_scores['accuracy'] = metrics.accuracy_score(y_train, m_bin_train)
    test_scores['accuracy'] = metrics.accuracy_score(y_test, m_bin_test)

    train_scores['mcc'] = metrics.matthews_corrcoef(y_train, m_bin_train)
    test_scores['mcc'] = metrics.matthews_corrcoef(y_test, m_bin_test)

    (p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
                                                           m_bin_train)
    train_scores['precision'] = p
    train_scores['recall'] = r
    train_scores['f1'] = f
    train_scores['support'] = s

    (p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
                                                           m_bin_test)
    test_scores['precision'] = p
    test_scores['recall'] = r
    test_scores['f1'] = f
    test_scores['support'] = s

    train_scores['confusion matrix'] = \
        metrics.confusion_matrix(y_train, m_bin_train, labels=[0, 1])
    test_scores['confusion matrix'] = \
        metrics.confusion_matrix(y_test, m_bin_test, labels=[0, 1])

    train_scores['auc score'] = \
        metrics.roc_auc_score(y_train, m_train + 1, average='weighted')
    test_scores['auc score'] = \
        metrics.roc_auc_score(y_test, m_test + 1, average='weighted')

    melodiness_scores = {'train': train_scores, 'test': test_scores}

    return melodiness_scores

modelData.py 文件源码项目：rdocChallenge 作者: Elyne 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def calc_and_append_scores(y_test, y_pred, metrics, featImportance):

    metrics['scores_mae'].append(mean_absolute_error(y_test, y_pred))
    _, score_off = mae(y_test, y_pred)
    metrics['scores_mae_official'].append(score_off)
    prec, rec, fmeasure, _ = precision_recall_fscore_support(y_test, y_pred, average='macro')

    metrics['scores_prec'].append(prec)
    metrics['scores_recall'].append(rec)
    metrics['scores_f1'].append(fmeasure)
    metrics['scores_accuracy'].append(accuracy_score(y_test, y_pred))
    metrics['feature_importance'].append(featImportance)


    # Getting class-individual metrics
    tTP = [0,0,0,0]
    tFP = [0,0,0,0]
    tTN = [0,0,0,0]
    tFN = [0,0,0,0]

    for act, pred in zip(y_test, y_pred):
        if act == pred:
            for i in range(0,4):
                if i == act: #add to true positive
                    tTP[i] += 1
                else: #add to true negative
                    tTN[i] += 1
        else:
            for i in range(0,4):
                if i == act: #add to false negative
                    tFN[i] += 1
                else: #add to false positive
                    tFP[i] += 1

    tpre = [0,0,0,0]
    trec = [0,0,0,0]
    tfm = [0,0,0,0]
    ttp = [0,0,0,0]
    for i in range(0,4):
        if (tTP[i] > 0.):
            tpre[i] = tTP[i] / (tTP[i] + tFP[i])
            trec[i] = tTP[i] / (tTP[i] + tFN[i])
        if ((trec[i] > 0.) | (tpre[i] > 0.)):
            tfm[i] = (2*(tpre[i] * trec[i])) / (tpre[i]+trec[i])
        ttp[i] = tTP[i]

    #for each label separately,
    # to see how well our model performs on separate labels
    metrics['indRec'].append(trec)
    metrics['indPrec'].append(tpre)
    metrics['indFmeasure'].append(tfm)
    metrics['indTP'].append(ttp)

core.py 文件源码项目：motif 作者: rabitt 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def score(self, y_predicted, y_target, y_prob=None):
        """ Compute metrics on classifier predictions

        Parameters
        ----------
        y_predicted : np.array [n_samples]
            Predicted class labels
        y_target : np.array [n_samples]
            Target class labels
        y_prob : np.array [n_samples] or None, default=None
            predicted probabilties. If None, auc is not computed

        Returns
        -------
        scores : dict
            dictionary of scores for the following metrics:
            accuracy, matthews correlation coefficient, precision, recall, f1,
            support, confusion matrix, auc score
        """
        labels = set(y_target)
        labels.update(y_predicted)
        is_binary = len(labels) <= 2

        scores = {}
        scores['accuracy'] = metrics.accuracy_score(y_target, y_predicted)

        if is_binary:
            scores['mcc'] = metrics.matthews_corrcoef(y_target, y_predicted)
        else:
            scores['mcc'] = None

        (scores['precision'],
         scores['recall'],
         scores['f1'],
         scores['support']) = metrics.precision_recall_fscore_support(
             y_target, y_predicted
         )

        scores['confusion matrix'] = metrics.confusion_matrix(
            y_target, y_predicted, labels=list(labels)
        )

        if y_prob is not None:
            scores['auc score'] = metrics.roc_auc_score(
                y_target, y_prob + 1, average='weighted'
            )
        else:
            scores['auc score'] = None

        return scores


###############################################################################

vwoptimize.py 文件源码项目：vwoptimize 作者: denik 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def classification_report(y_true, y_pred, labels=None, sample_weight=None, digits=4, threshold=None):
    # this function is copied from https://github.com/scikit-learn/scikit-learn/blob/412996f/sklearn/metrics/classification.py#L1341 (c) respective authors
    # I pulled it here to fix formatting bug.
    from sklearn.metrics import precision_recall_fscore_support, accuracy_score

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    if labels is None:
        from sklearn.utils.multiclass import unique_labels

        if threshold is not None:
            y_true = y_true > threshold
            y_pred = y_pred > threshold

        labels = unique_labels(y_true, y_pred)
    else:
        labels = np.asarray(labels)

    last_line_heading = 'avg / total'
    target_names = ['%s' % l for l in labels]

    results = [["", "precision", "recall", "f1-score", "support", "accuracy"]]

    p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  average=None,
                                                  sample_weight=sample_weight)

    for i, label in enumerate(labels):
        values = [target_names[i]]
        for v in (p[i], r[i], f1[i]):
            values += ["{0:0.{1}f}".format(v, digits)]
        values += ["{0}".format(s[i])]
        accuracy = accuracy_score(y_true == label, y_pred == label, sample_weight=sample_weight)
        values += ["{0:0.{1}f}".format(accuracy, digits)]
        results.append(values)

    values = [last_line_heading]
    for v in (np.average(p, weights=s),
              np.average(r, weights=s),
              np.average(f1, weights=s)):
        values += ["{0:0.{1}f}".format(v, digits)]
    values += ['{0}'.format(np.sum(s))]
    accuracy = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
    values += ["{0:0.{1}f}".format(accuracy, digits)]
    results.append(values)

    return results

classification.py 文件源码项目：DocumentClassification 作者: bahmanh 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2):
    clf = None
    precision = []
    recall = []
    fscore = []
    if classifier == "NN":
       clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None)   
    elif classifier == "LR":
        clf = linear_model.LogisticRegression(C=1e3)
        #clf = tree.DecisionTreeClassifier()
    if classifier == "RF":
        clf = RandomForestClassifier()
    elif classifier == "NB":
        clf = GaussianNB()
    elif classifier == "SVM":
        clf = LinearSVC()
    elif classifier == "KNN":
        clf = NearestCentroid()

    skf = StratifiedKFold(n_splits=nfold, shuffle=True)
    y_test_total = []
    y_pred_total = []

    for train_index, test_index in skf.split(document_term_matrix, labels):
        X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        y_test_total.extend(y_test.tolist())
        model = clf.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred_total.extend(y_pred.tolist())
        p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted')
        print accuracy_score(y_test, y_pred)
        a_score.append(accuracy_score(y_test, y_pred))
        precision.append(p)
        recall.append(r)
        fscore.append(f)

    plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5))

    plt.savefig('lc.png')

    return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)