python类metrics()的实例源码

detection.py 文件源码 项目:FeatureSqueezing 作者: uvasrg 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train_detector(x_train, y_train, x_val, y_val):
    fpr, tpr, thresholds = roc_curve(y_train, x_train)
    accuracy = [ sklearn.metrics.accuracy_score(y_train, x_train>threshold, normalize=True, sample_weight=None) for threshold in thresholds ]
    roc_auc = auc(fpr, tpr)

    idx_best = np.argmax(accuracy)
    print "Best training accuracy: %.4f, TPR(Recall): %.4f, FPR: %.4f @%.4f" % (accuracy[idx_best], tpr[idx_best], fpr[idx_best], thresholds[idx_best])
    print "ROC_AUC: %.4f" % roc_auc

    accuracy_val = [ sklearn.metrics.accuracy_score(y_val, x_val>threshold, normalize=True, sample_weight=None) for threshold in thresholds ]
    tpr_val, fpr_val = zip(*[ get_tpr_fpr(y_val, x_val, threshold)  for threshold in thresholds  ])
    # roc_auc_val = auc(fpr_val, tpr_val)
    print "Validation accuracy: %.4f, TPR(Recall): %.4f, FPR: %.4f @%.4f" % (accuracy_val[idx_best], tpr_val[idx_best], fpr_val[idx_best], thresholds[idx_best])

    return threshold, accuracy_val, fpr_val, tpr_val
blstm.py 文件源码 项目:semeval2017-scienceie 作者: UKPLab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def build_lstm(output_dim, embeddings):

    loss_function = "categorical_crossentropy"

    # this is the placeholder tensor for the input sequences
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype="int32")

    # this embedding layer will transform the sequences of integers
    embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=MAX_SEQUENCE_LENGTH, weights=[embeddings], trainable=True)(sequence)

    # 4 convolution layers (each 1000 filters)
    cnn = [Convolution1D(filter_length=filters, nb_filter=1000, border_mode="same") for filters in [2, 3, 5, 7]]
    # concatenate
    merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat")
    # create attention vector from max-pooled convoluted
    maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
    attention_vector = maxpool(merged_cnn)

    forwards = AttentionLSTM(64, attention_vector)(embedded)
    backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded)

    # concatenate the outputs of the 2 LSTM layers
    bi_lstm = merge([forwards, backwards], mode="concat", concat_axis=-1)

    after_dropout = Dropout(0.5)(bi_lstm)

    # softmax output layer
    output = Dense(output_dim=output_dim, activation="softmax")(after_dropout)

    # the complete omdel
    model = Model(input=sequence, output=output)

    # try using different optimizers and different optimizer configs
    model.compile("adagrad", loss_function, metrics=["accuracy"])

    return model
sklearn_backend.py 文件源码 项目:python-alp 作者: tboquet 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def to_dict_w_opt(model, metrics=None):
    """Serializes a sklearn model. Saves the parameters,
        not the attributes.

    Args:
        model(sklearn.BaseEstimator): the model to serialize,
            must be in SUPPORTED
        metrics(list, optionnal): a list of metrics to monitor

    Returns:
        a dictionnary of the serialized model
    """

    config = dict()
    typestring = str(type(model))[8:][:-2]
    config['config'] = typestring

    attr = model.__dict__

    for k, v in attr.items():
        # check if parameter or attribute
        if k[-1:] == '_':
            # do not store attributes
            pass
        else:
            config[k] = typeconversion(v)

    # to be discussed :
    # we add the metrics to the config even if it doesnt
    # make sense for a sklearn model
    # the metrics are then catch in model_from_dict_w_opt
    if metrics is not None:
        config['metrics'] = []
        for m in metrics:
            config['metrics'].append(m)

    return config
sklearn_backend.py 文件源码 项目:python-alp 作者: tboquet 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def model_from_dict_w_opt(model_dict, custom_objects=None):
    """Builds a sklearn model from a serialized model using `to_dict_w_opt`

    Args:
        model_dict(dict): a serialized sklearn model
        custom_objects(dict, optionnal): a dictionnary mapping custom objects
            names to custom objects (callables, etc.)

    Returns:
        A new sklearn.BaseEstimator (in SUPPORTED) instance. The attributes
        are not loaded.

    """
    if custom_objects is None:
        custom_objects = dict()

    # custom_objects = {k: deserialize(k, custom_objects[k])
    #                   for k in custom_objects}

    # safety check
    if model_dict['config'] not in keyval:
        raise NotImplementedError("sklearn model not supported.")

    # load the metrics
    if 'metrics' in model_dict:
        metrics = model_dict.pop('metrics')
    else:
        metrics = None

    # create a new instance of the appropriate model type
    model = copy.deepcopy(keyval[model_dict['config']])

    # load the parameters
    for k, v in model_dict.items():
        if isinstance(v, list):  # pragma: no cover
            setattr(model, k, np.array(v))
        else:
            setattr(model, k, v)

    return model, metrics
seizure_modeling.py 文件源码 项目:kaggle-seizure-prediction 作者: sics-lm 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def select_model(training_data, method='logistic',
                 do_segment_split=True,
                 processes=1,
                 cv_verbosity=2,
                 model_params=None,
                 random_state=None):
    """
    Fits a model given by *method* to the training data.
    :param training_data: The training data to fit the model with
    :param method: A string which specifies the model to use.
    :param do_segment_split: If True, the training data will be split by segment.
    :param processes: The number of processes to use for the grid search.
    :param cv_verbosity: The verbosity level of the grid search. 0 is silent, 2 is maximum verbosity.
    :param model_params: An optional dictionary with keyword arguments to tune the grid search.
    :param random_state: A constant which will seed the random number generator if given.
    :return: The fitted grid search object.
    """

    logging.info("Training a {} model".format(method))

    training_data_x = training_data.drop('Preictal', axis=1)
    training_data_y = training_data['Preictal']

    cv = get_cv_generator(training_data, do_segment_split=do_segment_split, random_state=random_state)

    scorer = sklearn.metrics.make_scorer(sklearn.metrics.roc_auc_score, average='weighted')
    model_dict = get_model(method,
                           training_data_x,
                           training_data_y,
                           model_params=model_params,
                           random_state=random_state)
    common_cv_kwargs = dict(cv=cv,
                            scoring=scorer,
                            n_jobs=processes,
                            pre_dispatch='2*n_jobs',
                            refit=True,
                            verbose=cv_verbosity,
                            iid=False)

    cv_kwargs = dict(common_cv_kwargs)
    cv_kwargs.update(model_dict)

    logging.info("Running grid search using the parameters: {}".format(model_dict))
    clf = GridSearchCV(**cv_kwargs)
    clf.fit(training_data_x, training_data_y)

    return clf
vwoptimize.py 文件源码 项目:vwoptimize 作者: denik 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def classification_report(y_true, y_pred, labels=None, sample_weight=None, digits=4, threshold=None):
    # this function is copied from https://github.com/scikit-learn/scikit-learn/blob/412996f/sklearn/metrics/classification.py#L1341 (c) respective authors
    # I pulled it here to fix formatting bug.
    from sklearn.metrics import precision_recall_fscore_support, accuracy_score

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    if labels is None:
        from sklearn.utils.multiclass import unique_labels

        if threshold is not None:
            y_true = y_true > threshold
            y_pred = y_pred > threshold

        labels = unique_labels(y_true, y_pred)
    else:
        labels = np.asarray(labels)

    last_line_heading = 'avg / total'
    target_names = ['%s' % l for l in labels]

    results = [["", "precision", "recall", "f1-score", "support", "accuracy"]]

    p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  average=None,
                                                  sample_weight=sample_weight)

    for i, label in enumerate(labels):
        values = [target_names[i]]
        for v in (p[i], r[i], f1[i]):
            values += ["{0:0.{1}f}".format(v, digits)]
        values += ["{0}".format(s[i])]
        accuracy = accuracy_score(y_true == label, y_pred == label, sample_weight=sample_weight)
        values += ["{0:0.{1}f}".format(accuracy, digits)]
        results.append(values)

    values = [last_line_heading]
    for v in (np.average(p, weights=s),
              np.average(r, weights=s),
              np.average(f1, weights=s)):
        values += ["{0:0.{1}f}".format(v, digits)]
    values += ['{0}'.format(np.sum(s))]
    accuracy = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
    values += ["{0:0.{1}f}".format(accuracy, digits)]
    results.append(values)

    return results
model_eval.py 文件源码 项目:healthcareai-py 作者: HealthCatalyst 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def roc_plot_from_thresholds(roc_thresholds_by_model, save=False, debug=False):
    """
    From a given dictionary of thresholds by model, create a ROC curve for each model.

    Args:
        roc_thresholds_by_model (dict): A dictionary of ROC thresholds by model name.
        save (bool): False to display the image (default) or True to save it (but not display it)
        debug (bool): verbost output.
    """
    # TODO consolidate this and PR plotter into 1 function
    # TODO make the colors randomly generated from rgb values
    # Cycle through the colors list
    color_iterator = itertools.cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k'])
    # Initialize plot
    plt.figure()
    plt.xlabel('False Positive Rate (FPR)')
    plt.ylabel('True Positive Rate (TRP)')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.plot([0, 1], [0, 1], linestyle=DIAGONAL_LINE_STYLE, color=DIAGONAL_LINE_COLOR)

    # Calculate and plot for each model
    for color, (model_name, metrics) in zip(color_iterator, roc_thresholds_by_model.items()):
        # Extract model name and metrics from dictionary
        roc_auc = metrics['roc_auc']
        tpr = metrics['true_positive_rates']
        fpr = metrics['false_positive_rates']
        best_true_positive_rate = metrics['best_true_positive_rate']
        best_false_positive_rate = metrics['best_false_positive_rate']

        if debug:
            print('{} model:'.format(model_name))
            print(pd.DataFrame({'FPR': fpr, 'TPR': tpr}))

        # plot the line
        label = '{} (ROC AUC = {})'.format(model_name, round(roc_auc, 2))
        plt.plot(fpr, tpr, color=color, label=label)
        plt.plot([best_false_positive_rate], [best_true_positive_rate], marker='*', markersize=10, color=color)

    plt.legend(loc="lower right")

    if save:
        plt.savefig('ROC.png')
        source_path = os.path.dirname(os.path.abspath(__file__))
        print('\nROC plot saved in: {}'.format(source_path))

    plt.show()
model_eval.py 文件源码 项目:healthcareai-py 作者: HealthCatalyst 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def pr_plot_from_thresholds(pr_thresholds_by_model, save=False, debug=False):
    """
    From a given dictionary of thresholds by model, create a PR curve for each model.

    Args:
        pr_thresholds_by_model (dict): A dictionary of PR thresholds by model name.
        save (bool): False to display the image (default) or True to save it (but not display it)
        debug (bool): verbost output.
    """
    # TODO consolidate this and PR plotter into 1 function
    # TODO make the colors randomly generated from rgb values
    # Cycle through the colors list
    color_iterator = itertools.cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k'])
    # Initialize plot
    plt.figure()
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision Recall (PR)')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.plot([0, 1], [1, 0], linestyle=DIAGONAL_LINE_STYLE, color=DIAGONAL_LINE_COLOR)

    # Calculate and plot for each model
    for color, (model_name, metrics) in zip(color_iterator, pr_thresholds_by_model.items()):
        # Extract model name and metrics from dictionary
        pr_auc = metrics['pr_auc']
        precision = metrics['precisions']
        recall = metrics['recalls']
        best_recall = metrics['best_recall']
        best_precision = metrics['best_precision']

        if debug:
            print('{} model:'.format(model_name))
            print(pd.DataFrame({'Recall': recall, 'Precision': precision}))

        # plot the line
        label = '{} (PR AUC = {})'.format(model_name, round(pr_auc, 2))
        plt.plot(recall, precision, color=color, label=label)
        plt.plot([best_recall], [best_precision], marker='*', markersize=10, color=color)

    plt.legend(loc="lower left")

    if save:
        plt.savefig('PR.png')
        source_path = os.path.dirname(os.path.abspath(__file__))
        print('\nPR plot saved in: {}'.format(source_path))

    plt.show()
machine_learning.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def decis_tree(wine_set):
    # to remember the if the wine_set red or white
    w = wine_set

    # subset data for better tree visibility
    # wine_set = wine_set[:100]

    # recode quality (response variable) into 2 groups: 0:{3,4,5}, 1:{6,7,8,9}
    recode = {3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1}
    wine_set['quality_c'] = wine_set['quality'].map(recode)

    # round explanatory data for easier tree
    # wine_set["residual_sugar"] = wine_set["residual_sugar"].round()
    # wine_set["alcohol"] = wine_set["alcohol"].round()

    # split into training and testing sets
    predictors = wine_set[["residual_sugar", 'alcohol']]
    targets = wine_set.quality_c

    pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)

    # build model on training data
    classifier = DecisionTreeClassifier()
    classifier = classifier.fit(pred_train, tar_train)

    predictions = classifier.predict(pred_test)

    # print the confusion matrix and accuracy of the model
    print(sklearn.metrics.confusion_matrix(tar_test, predictions))
    print(sklearn.metrics.accuracy_score(tar_test, predictions))

    # export the tree for viewing
    if w.equals(red):
        export_graphviz(classifier, out_file="red_decision_tree.dot")
    else:
        export_graphviz(classifier, out_file="white_decision_tree.dot")
    # to view the decision tree create a .pdf file from the created .dot file
    # by typing in the terminal from this directory: dot -Tpdf decision_tree.dot -o decision_tree.pdf
# print('----------------Decision Tree------------------------')
# call(decis_tree)


# ____________________________________Random Forests________________
machine_learning.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def random_forests(wine_set):
    # recode quality (response variable) into 2 groups: 0:{3,4,5}, 1:{6,7,8,9}
    recode = {3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1}
    wine_set['quality_c'] = wine_set['quality'].map(recode)

    # split into training and testing sets
    predictors = wine_set[["density", 'alcohol', 'sulphates', 'pH', 'volatile_acidity', 'chlorides', 'fixed_acidity',
                           'citric_acid', 'residual_sugar', 'free_sulfur_dioxide', 'total_sulfur_dioxide']]

    targets = wine_set.quality_c

    pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)

    # build model on training data#
    classifier = RandomForestClassifier(n_estimators=25)
    classifier = classifier.fit(pred_train, tar_train)

    predictions = classifier.predict(pred_test)
    # print the confusion matrix and accuracy of the model
    print('confusion matrix:\n', sklearn.metrics.confusion_matrix(tar_test, predictions))
    print('\naccuracy:', sklearn.metrics.accuracy_score(tar_test, predictions))

    # to display the relative importance of each predictive variable
    model = ExtraTreesClassifier()
    model.fit(pred_train, tar_train)

    print('importance of predictors:')
    dct = dict()
    for c in range(len(predictors.columns)):
        dct[predictors.columns[c]] = model.feature_importances_[c]
    print(sorted(dct.items(), key=operator.itemgetter(1), reverse=True))

    # run different numbers of trees to see the effect of the number on the accuracy of the prediction
    n = 100
    accuracy = [0]*n

    for i in range(n):
        classifier = RandomForestClassifier(n_estimators=i+1)
        classifier = classifier.fit(pred_train, tar_train)
        predictions = classifier.predict(pred_test)
        accuracy[i] = sklearn.metrics.accuracy_score(tar_test, predictions)

    plt.plot(range(1, n+1), accuracy)
    plt.xlabel("Number of trees")
    plt.ylabel("Accuracy of prediction")
    plt.title("Effect of the number of trees on the prediction accuracy")
    plt.show()

    print(accuracy)

# print('----------------Random Forests------------------------')
# call(random_forests)


# ________________________________Lasso Regression__________________________________
task4_audio_tagging.py 文件源码 项目:dcase2016_task4 作者: pafoster 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def do_system_evaluation(dataset, dataset_evaluation_mode, result_path):

    # Set warnings off, sklearn metrics will trigger warning for classes without
    # predicted samples in F1-scoring. This is just to keep printing clean.
    #warnings.simplefilter("ignore")

    fold_wise_class_eer = numpy.zeros((len(dataset.folds(mode=dataset_evaluation_mode)), dataset.audio_tag_count))

    for fold in dataset.folds(mode=dataset_evaluation_mode):
        class_wise_eer       = numpy.zeros((dataset.audio_tag_count))
        results = []
        result_filename = get_result_filename(fold=fold, path=result_path)
        if os.path.isfile(result_filename):
            with open(result_filename, 'rt') as f:
                for row in csv.reader(f, delimiter=','):
                    results.append(row)
        else:
            raise IOError("Result file not found [%s]" % result_filename)

        for tag_id,tag in enumerate(dataset.audio_tags):

            y_true_binary = []
            y_true_file = []
            y_score = []
            for result in results:
                if tag == result[1]:
                    relative_path = dataset.package_list[0]['local_audio_path'].replace(dataset.local_path,'')[1:] + os.path.sep + result[0]
                    y_true_file.append(result[0])
                    if tag in dataset.file_meta(relative_path)[0]['tags']:
                        y_true_binary.append(1)
                    else:
                        y_true_binary.append(0)

                    y_score.append(float(result[2]))

            if numpy.any(y_true_binary):
                class_wise_eer[tag_id] = compute_eer(result_filename, tag, dict(zip(y_true_file, y_true_binary)))
            else:
                class_wise_eer[tag_id] = None

        fold_wise_class_eer[fold - 1 if fold > 0 else fold, :] = class_wise_eer

    print "  File-wise evaluation, over %d folds" % (dataset.fold_count)

    print "     {:20s} | {:8s}".format('Tag', 'EER')
    print "     ==============================================="
    labels = numpy.array([dataset.tagcode_to_taglabel(t) for t in dataset.audio_tags])
    for i in numpy.argsort(labels):
        print "     {:20s} | {:3.3f} ".format(labels[i],
                                                                    numpy.nanmean(fold_wise_class_eer[:,i])
                                                                    )
    print "     ==============================================="
    print "     {:20s} | {:3.3f} ".format('Mean error',
                                                      numpy.mean(numpy.nanmean(fold_wise_class_eer))
                                                      )
    # Restore warnings to default settings
    warnings.simplefilter("default")
random_forest.py 文件源码 项目:SinaWeiboSpider 作者: SuperSaiyanSSS 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def rand_forest_train(self):
        # ??????????
        users = pd.read_csv('names.csv')
        # ??similarity?platform?reputation?entropy????????????
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        y = users['human_or_machine']

        # ?????????? 25%???????
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)

        # ????????????????
        from sklearn.feature_extraction import DictVectorizer
        vec = DictVectorizer(sparse=False)
        X_train = vec.fit_transform(X_train.to_dict(orient='record'))
        X_test = vec.transform(X_test.to_dict(orient='record'))

        # ?????????????????????
        from sklearn.tree import DecisionTreeClassifier
        dtc = DecisionTreeClassifier()
        dtc.fit(X_train, y_train)
        dtc_y_pred = dtc.predict(X_test)

        # ???????????????????????
        from sklearn.ensemble import RandomForestClassifier
        rfc = RandomForestClassifier()
        rfc.fit(X_train, y_train)
        rfc_y_pred = rfc.predict(X_test)

        # ???????????????????????
        from sklearn.ensemble import GradientBoostingClassifier
        gbc = GradientBoostingClassifier()
        gbc.fit(X_train, y_train)
        gbc_y_pred = gbc.predict(X_test)

        from sklearn.metrics import classification_report
        # ??????????????????? ?????????? ??? F1??
        print("??????????", dtc.score(X_test, y_test))
        print(classification_report(dtc_y_pred, y_test))

        # ??????????????????????????????? ??? F1??
        print("????????????", rfc.score(X_test, y_test))
        print(classification_report(rfc_y_pred, y_test))

        # ??????????????????????????????? ??? F1??
        print("????????????", gbc.score(X_test, y_test))
        print(classification_report(gbc_y_pred, y_test))


        users = pd.read_csv('values.csv')

        # ??????????
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        X = vec.transform(X.to_dict(orient='record'))
        print(rfc.predict(X))

        self.dtc = dtc
        self.rfc = rfc
        self.gbc = gbc


问题


面经


文章

微信
公众号

扫码关注公众号