python类log_loss()的实例源码

predict_2017_06_16_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    N_splits = 300
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    for n, (itrain, ival) in enumerate(skf.split(train2, y)):
        print('step %d of %d'%(n+1, skf.n_splits), now())
        clf = ensemble.RandomForestRegressor(n_estimators=1000,
                                             max_depth=3,
                                             random_state=13)
        clf.fit(train2[itrain], y[itrain])

        p = clf.predict(train2[ival])
        v.loc[ival, cname] += p
        score = metrics.log_loss(y[ival], p)
        z[cname]  += np.log1p(clf.predict(test2))
        print(cname, 'step %d: score'%(n+1), score, now())
        scores.append(score)

    print('validation loss: ', metrics.log_loss(y, v[cname]))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= N_splits
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_stacked_classfier_extkfold(self):
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = StackedClassifier(bclf,
                               clfs,
                               n_folds=3,
                               verbose=0,
                               Kfold=StratifiedKFold(self.iris.target, 3),
                               stack_by_proba=False,
                               oob_score_flag=True,
                               oob_metrics=log_loss)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
ensemble.py 文件源码 项目:kaggle_airbnb 作者: svegapons 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def opt_2_obj_func(w, X, y, n_class):
    """
    Function to be minimized in the EN_OPT_2 ensembler.
    In this case there is only one weight for each classification restlt to be 
    combined.
    Parameters:
    ----------
    w: ndarray size=(n_preds)
       Candidate solution to the optimization problem (vector of weights).
    X: ndarray size=(n_samples, n_preds * n_class)
       Solutions to be combined horizontally concatenated.
    y: ndarray size=(n_samples,)
       Class labels
    n_class: int
       Number of classes in the problem, i.e. = 12
    """
    w = np.abs(w)
    sol = np.zeros((X.shape[0], n_class))
    for i in range(len(w)):
        sol += X[:, i*n_class:(i+1)*n_class] * w[i]
    #Minimizing the logloss   
    sc_ll = log_loss(y, sol)
    return sc_ll
test_submission.py 文件源码 项目:KaggleExeter 作者: detomo 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cross_validate(train):
    #separate training and validation set
    X_train,X_valid= split_train_validation(train)
    scores = []; preds = []
    for i in xrange(len(X_train)):
        #convert X_train, Y_train etc... to xgboost matrix
        dtrain = xgb.DMatrix(X_train[i][['phone_brand','device_model','timestamp']], label = X_train[i]['group'],missing=np.nan) 
        dvalid = xgb.DMatrix(X_valid[i][['phone_brand','device_model','timestamp']], label = X_valid[i]['group'],missing=np.nan)

        #predict with xgboost
        parameters = {'max_depth':4,'eta':0.1,'silent':1, 'subsample':0.8,'colsample_bytree':0.8,
                'objective':'multi:softprob','booster':'gbtree','early_stopping_rounds':50,
                'num_class':12,'num_boost_round':1000,'eval_metric':'mlogloss'}
        plst = parameters.items()
        bst = xgb.train(plst, dtrain)
        pred = bst.predict(dvalid)

        scores.append(log_loss(X_valid[i]['group'].tolist(),pred))
        pred = pd.DataFrame(pred, index = X_valid[i].index, columns=target_encoder.classes_)
        preds.append(pred)
    return scores, preds
B_4_find_FR_with_for.py 文件源码 项目:tencent_social_algo 作者: Folieshell 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def check_log_loss(max_depth, n_splits, test_size):
    model = RandomForestClassifier(max_depth=max_depth, n_jobs=-1, random_state=777)
    trn_scores = []
    vld_scores = []
    sss = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=777)
    for i, (t_ind, v_ind) in enumerate(sss.split(feature_train, trainY)):
        print('# Iter {} / {}'.format(i + 1, n_splits))
        x_trn = feature_train.values[t_ind]
        y_trn = trainY[t_ind]
        x_vld = feature_train.values[v_ind]
        y_vld = trainY[v_ind]

        model.fit(x_trn, y_trn)

        score = log_loss(y_trn, model.predict_proba(x_trn))
        trn_scores.append(score)

        score = log_loss(y_vld, model.predict_proba(x_vld))
        vld_scores.append(score)

    print("max_depth: %d   n_splits: %d    test_size: %f" % (max_depth, n_splits, test_size))
    print('# TRN logloss: {}'.format(np.mean(trn_scores)))
    print('# VLD logloss: {}'.format(np.mean(vld_scores)))
bayesian_encode_fivelevel_withint_v2.py 文件源码 项目:bnp 作者: mpearmain 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y
bayesian_encode_fourlevel_withint.py 文件源码 项目:bnp 作者: mpearmain 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y
bayesian_encode_fivelevel_withint.py 文件源码 项目:bnp 作者: mpearmain 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y
extratrees_autotune.py 文件源码 项目:bnp 作者: mpearmain 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def extratreescv(n_estimators,
                 min_samples_split,
                 min_samples_leaf,
                 max_features,
                 max_depth,
                 min_weight_fraction_leaf
                 ):

    clf = ExtraTreesClassifier(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               min_samples_leaf=int(min_samples_leaf),
                               max_features= int(max_features),
                               max_depth = int(max_depth),
                               min_weight_fraction_leaf = min_weight_fraction_leaf,
                               n_jobs=-1,
                               random_state=1234,
                               verbose=1)

    clf.fit(x0, y0)
    ll = -log_loss(y1, clf.predict_proba(x1)[:,1])
    return ll
predict_2017_06_27_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
    v[cname], z[cname] = 0, 0
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    dtest = xgb.DMatrix(test2)
    for s in range(N_seeds):
        xgb_params['seed'] = s + base_seed
        for n, (itrain, ival) in enumerate(skf.split(train2, y)):
            dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
            dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
            watch = [(dtrain, 'train'), (dvalid, 'valid')]
            clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)

            p = clf.predict(dvalid)
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            z[cname]  += pconvert(clf.predict(dtest))
            print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
            scores.append(score)

    z[cname] /= N_splits * N_seeds
    v[cname] /= N_seeds
    print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case

    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def log_loss_(solution, prediction):
    return metrics.log_loss(solution, prediction)
common_defs.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train_and_eval_sklearn_classifier( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 

    try:
        p = clf.predict_proba( x_train )[:,1]   # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_train )

    ll = log_loss( y_train, p )
    auc = AUC( y_train, p )
    acc = accuracy( y_train, np.round( p ))

    print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )

    #

    try:
        p = clf.predict_proba( x_test )[:,1]    # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_test )

    ll = log_loss( y_test, p )
    auc = AUC( y_test, p )
    acc = accuracy( y_test, np.round( p ))

    print "# testing  | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) 

    #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
    return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss

    # smallest prob given to an actual catastrophe
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss

    # smallest prob given to an actual catastrophe
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss

    # smallest prob given to an actual catastrophe
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss

    # smallest prob given to an actual catastrophe
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss

    # smallest prob given to an actual catastrophe
libscores.py 文件源码 项目:AutoML4 作者: djajetic 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
libscores.py 文件源码 项目:AutoML4 作者: djajetic 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case

    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss
libscores.py 文件源码 项目:AutoML4 作者: djajetic 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def log_loss_(solution, prediction):
    return metrics.log_loss(solution, prediction)
libscores.py 文件源码 项目:automl_gpu 作者: abhishekkrthakur 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
libscores.py 文件源码 项目:automl_gpu 作者: abhishekkrthakur 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case

    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss
libscores.py 文件源码 项目:automl_gpu 作者: abhishekkrthakur 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def log_loss_(solution, prediction):
    return metrics.log_loss(solution, prediction)
deepFM.py 文件源码 项目:DeepFM 作者: dwt0317 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def predict_test_file(preds, sess, test_file, feature_cnt, _indices, _values, _values2, _cont_values, _text_values, _shape,
                      _cont_shape, _text_shape, _y, _ind, epoch, batch_size, tag, path, output_prediction=True):
    day = date.today()
    if output_prediction:
        wt = open(path + '/'+str(day)+'_deepFM_pred_' + tag + str(epoch) + '.txt', 'w')

    gt_scores = []
    pred_scores = []

    for test_input_in_sp in load_data_cache(test_file):
        predictios = sess.run(preds, feed_dict={
            _indices: test_input_in_sp['indices'], _values: test_input_in_sp['values'],
            _shape: test_input_in_sp['shape'], _cont_shape: test_input_in_sp['cont_shape'],
            _text_values: test_input_in_sp['text_values'], _text_shape: test_input_in_sp['text_shape'],

            _y: test_input_in_sp['labels'], _values2: test_input_in_sp['values2'],
            _cont_values: test_input_in_sp['cont_values'], _ind: test_input_in_sp['feature_indices']
        }).reshape(-1).tolist()

        if output_prediction:
            for (gt, preded) in zip(test_input_in_sp['labels'].reshape(-1).tolist(), predictios):
                wt.write('{0:d},{1:f}\n'.format(int(gt), preded))
                gt_scores.append(gt)
                # pred_scores.append(1.0 if preded >= 0.5 else 0.0)
                pred_scores.append(preded)
        else:
            gt_scores.extend(test_input_in_sp['labels'].reshape(-1).tolist())
            pred_scores.extend(predictios)
    auc = metrics.roc_auc_score(np.asarray(gt_scores), np.asarray(pred_scores))
    logloss = metrics.log_loss(np.asarray(gt_scores), np.asarray(pred_scores))
    # print('auc is ', auc, ', at epoch  ', epoch)
    if output_prediction:
        wt.close()
    return auc, logloss
classification.py 文件源码 项目:aboleth 作者: data61 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def print_k_result(ys, Ep, ll, acc, name):
    acc.append(accuracy_score(ys, Ep.argmax(axis=1)))
    ll.append(log_loss(ys, Ep))
    print("{}: accuracy = {:.4g}, log-loss = {:.4g}"
          .format(name, acc[-1], ll[-1]))
kera.py 文件源码 项目:Quantrade 作者: quant-trade 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main():
    validate = True
    n = SData(validate=validate)

    Xtrain = n.train_features.as_matrix()
    ytrain = n.train_targets
    Xtest = n.test_features.as_matrix()
    ytest = n.test_targets

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
    Xtest  = np.reshape(Xtest, (Xtest.shape[0],  Xtest.shape[1], 1))

    rnn = RNN([1, 100, 100, 1])
    rnn.fit(Xtrain, ytrain)
    p = rnn.predict(Xtest)
    p_prob = rnn.predict(Xtest)

    if validate:
        mse = mean_squared_error(ytest, p)
        print("MSE: {}".format(mse))
        loss = log_loss(ytest, p_prob)
        print("Log loss: {}".format(loss))
    else:
        base_path = dirname(__file__)
        results_df = DataFrame(data={'probability':results})
        joined = DataFrame(t_id).join(results_df)
        joined.to_csv(join(base_path, 'results', 'dl.csv'), index=False)
metrics.py 文件源码 项目:molearn 作者: jmread 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def Log_loss(Ytest,Ydist):
    return log_loss(Ytest, Ydist, eps=1e-15, normalize=True)
#    N_test,L = Ytest.shape
#    return sum((Ytest == Ypred) * 1.) / N_test / L
FM.py 文件源码 项目:nfm 作者: faychu 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def parse_args():
    parser = argparse.ArgumentParser(description="Run FM.")
    parser.add_argument('--path', nargs='?', default='./data/',
                        help='Input data path.')
    parser.add_argument('--dataset', nargs='?', default='frappe',
                        help='Choose a dataset.')
    parser.add_argument('--epoch', type=int, default=100,
                        help='Number of epochs.')
    parser.add_argument('--pretrain', type=int, default=-1,
                        help='flag for pretrain. 1: initialize from pretrain; 0: randomly initialize; -1: save the model to pretrain file')
    parser.add_argument('--batch_size', type=int, default=128,
                        help='Batch size.')
    parser.add_argument('--hidden_factor', type=int, default=64,
                        help='Number of hidden factors.')
    parser.add_argument('--lamda', type=float, default=0,
                        help='Regularizer for bilinear part.')
    parser.add_argument('--keep_prob', type=float, default=0.5, 
                    help='Keep probility (1-dropout_ratio) for the Bi-Interaction layer. 1: no dropout')
    parser.add_argument('--lr', type=float, default=0.05,
                        help='Learning rate.')
    parser.add_argument('--loss_type', nargs='?', default='square_loss',
                        help='Specify a loss type (square_loss or log_loss).')
    parser.add_argument('--optimizer', nargs='?', default='AdagradOptimizer',
                        help='Specify an optimizer type (AdamOptimizer, AdagradOptimizer, GradientDescentOptimizer, MomentumOptimizer).')
    parser.add_argument('--verbose', type=int, default=1,
                        help='Show the results per X epochs (0, 1 ... any positive integer)')
    parser.add_argument('--batch_norm', type=int, default=0,
                    help='Whether to perform batch normaization (0 or 1)')

    return parser.parse_args()


问题


面经


文章

微信
公众号

扫码关注公众号