python类logit()的实例源码-面圈网

model.py 文件源码项目：DriverPower 作者: smshuai 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def run_lasso(X, y, max_iter=3000, cv=5, n_threads=1):
    """ Implement LassoCV in sklearn

    Args:
        X (np.array): scaled X.
        y (pd.df): four columns response table. 
        max_iter (int): max iteration. 
        cv (int): CV fold.
        n_threads (int): Number of threads to use for parallel computing.

    Returns:
        float: trained alpha value.

    """
    logger.info('Implementing LassoCV with {} iter. and {}-fold CV'.format(max_iter, cv))
    # generate logit response
    y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
    # sub-sampling X and y (300,000)
    use_ix = np.random.choice(y_logit.shape[0], 300000, replace=False)
    Xsub = X[use_ix, :]
    ysub = y_logit[use_ix]
    reg = LassoCV(max_iter=max_iter, cv=cv, copy_X=False, n_jobs=n_threads)
    lassocv = reg.fit(Xsub, ysub)
    logger.info('LassoCV alpha = {}'.format(lassocv.alpha_))
    return lassocv.alpha_

regression_stealer.py 文件源码项目：Steal-ML 作者: ftramer 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def find_coeffs_bin(self, budget):
        k = len(self.classes)       # number of classes
        assert k == 2
        n = self.num_features()     # vector dimension

        X_train = self.gen_query_set(n, budget)
        y = logit(self.query_probas(X_train)[:, 1])

        X = np.hstack((X_train, np.ones((budget, 1))))

        if budget == n+1:
            try:
                w_opt = np.linalg.solve(X, y).T
            except np.linalg.linalg.LinAlgError:
                w_opt = np.linalg.lstsq(X, y)[0].T
        else:
            w_opt = np.linalg.lstsq(X, y)[0].T

        int_opt = w_opt[-1]
        w_opt = np.array([w_opt[:-1]])

        self.X_train = X_train

        return w_opt, int_opt

eval_pol.py 文件源码项目：policy_search_bb-alpha 作者: siemens 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def predict(st,norm,bounds):

    rew = np.log(1+ (st[:,-1:]))

    a_x = bounds[0]
    b_x = bounds[2]

    eps = 1e-5

    rew = np.clip(rew,a_x+eps,b_x-eps)

    rew = logit((rew - a_x) / (b_x - a_x))

    st [:,-1:] = rew  

    State  = np.zeros((1,61))
    State[0,:] = np.hstack((st[0,0],st[:,[1,2,3,-1]].ravel()))

    X = (State  - norm[0]) / norm[1]
    return np.round(policy_network(X)[0,:],4)

test_models.py 文件源码项目：pymer4 作者: ejolly 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(),'sample_data.csv'))
    model = Lmer('DV_l ~ IV1+ (IV1|Group)',data=df,family='binomial')
    model.fit(summarize=False)

    assert model.coefs.shape == (2,13)
    estimates = np.array([-0.16098421,  0.00296261])
    assert np.allclose(model.coefs['Estimate'],estimates,atol=.001)

    assert isinstance(model.fixef,pd.core.frame.DataFrame)
    assert model.fixef.shape == (47,2)

    assert isinstance(model.ranef,pd.core.frame.DataFrame)
    assert model.ranef.shape == (47,2)

    assert np.allclose(model.coefs.loc[:,'Estimate'],model.fixef.mean(),atol=.01)

    # Test prediction
    assert np.allclose(model.predict(model.data,use_rfx=True),model.data.fits)
    assert np.allclose(model.predict(model.data,use_rfx=True,pred_type='link'),logit(model.data.fits))

mephistopheies.py 文件源码项目：kaggle-quora-question-pairs 作者: stys 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def compute_sgd(data):
    logging.info('Computing SGD')

    n_splits = 10
    folder = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for ix_first, ix_second in tqdm_notebook(folder.split(np.zeros(data['y_train'].shape[0]), data['y_train']),
                                             total=n_splits):
        # {'en__l1_ratio': 0.0001, 'en__alpha': 1e-05}
        model = SGDClassifier(
            loss='log',
            penalty='elasticnet',
            fit_intercept=True,
            n_iter=100,
            shuffle=True,
            n_jobs=-1,
            l1_ratio=0.0001,
            alpha=1e-05,
            class_weight=None)
        model = model.fit(data['X_train'][ix_first, :], data['y_train'][ix_first])
        data['y_train_pred'][ix_second] = logit(model.predict_proba(data['X_train'][ix_second, :])[:, 1])
        data['y_test_pred'].append(logit(model.predict_proba(data['X_test'])[:, 1]))

    data['y_test_pred'] = np.array(data['y_test_pred']).T.mean(axis=1)

    return data

test_normalization.py 文件源码项目：BlueWhale 作者: caffe2 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def preprocess_feature(self, feature, parameters):
        is_not_empty = 1 - np.isclose(feature, normalization.MISSING_VALUE)
        if parameters.feature_type == identify_types.BINARY:
            # Binary features are always 1 unless they are 0
            return ((feature != 0) * is_not_empty).astype(np.float32)
        if parameters.boxcox_lambda is not None:
            feature = stats.boxcox(
                np.maximum(
                    feature + parameters.boxcox_shift,
                    normalization.BOX_COX_MARGIN
                ), parameters.boxcox_lambda
            )
        # No *= to ensure consistent out-of-place operation.
        if parameters.feature_type == identify_types.PROBABILITY:
            feature = np.clip(feature, 0.01, 0.99)
            feature = special.logit(feature)
        elif parameters.feature_type == identify_types.QUANTILE:
            quantiles = parameters.quantiles
            values = np.zeros(feature.shape)
            for quantile in quantiles:
                values += feature >= quantile
            feature = values / float(len(quantiles))
        elif parameters.feature_type == identify_types.ENUM:
            possible_values = parameters.possible_values
            mapping = {}
            for i, possible_value in enumerate(possible_values):
                mapping[possible_value] = i
            output_feature = np.zeros((len(feature), len(possible_values)))
            for i, val in enumerate(feature):
                output_feature[i][mapping[val]] = 1.0
            return output_feature
        else:
            feature = feature - parameters.mean
            feature /= parameters.stddev
        feature *= is_not_empty
        return feature

model.py 文件源码项目：DriverPower 作者: smshuai 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def run_rndlasso(X, y, alpha,
    n_resampling=500, sample_fraction=0.1, n_threads=1):
    """  Implement Randomized Lasso in sklearn

    Args:
        X (np.array): scaled X. 
        y (pd.df): four columns response table. 
        alpha (float): parameter trained from lassoCV 
        n_resampling (int): number of times for resampling 
        sample_fraction (float): fraction of data to use at each resampling

    Returns:
        np.array: feature importance scores

    """
    logger.info('Implementing Randomized Lasso with alpha={}, n_resampling={} and sample_fraction={}'.
                format(alpha, n_resampling, sample_fraction))
    # generate logit response
    y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
    reg = RandomizedLasso(alpha=alpha,
                          n_resampling=n_resampling,
                          sample_fraction=sample_fraction,
                          selection_threshold=1e-3,
                          max_iter=3000,
                          normalize=False,
                          n_jobs=n_threads)
    rndlasso = reg.fit(X, y_logit)
    fi_scores = rndlasso.scores_
    return fi_scores

target.py 文件源码项目：uncover-ml 作者: GeoscienceAustralia 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def itransform(self, y_transformed):

        yscale = logit(y_transformed)
        return (yscale / self.scale)

train.py 文件源码项目：instacart-basket-prediction 作者: colinmorris 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def munge_scoreses(scoreses, df):
  npredictors = len(scoreses)
  score_shape = (len(df), npredictors)
  scores = np.empty(score_shape, dtype=np.float32)
  # Yay, nested loops :/
  i = 0
  for (uid, pid) in df[ ['uid', 'pid'] ].itertuples(index=False):
    for predictor_ix, pdict in enumerate(scoreses):
      prob = pdict[uid][pid]
      scores[i, predictor_ix] = logit(prob)
    i += 1

  return scores

train.py 文件源码项目：instacart-basket-prediction 作者: colinmorris 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def vectorize_fold(fold, tags, meta_df, use_metafeats=True):
  with time_me('Loaded pdicts'):
    scoreses = [common.pdict_for_tag(tag, fold) for tag in tags]
  df = meta_df[meta_df['fold']==fold]
  assert len(df)
  y = df['label']
  n_predictors = len(scoreses)
  with time_me('Munged scores for {} predictors'.format(n_predictors), mode='print'):
    # TODO: could use the logit loading fn added to user_wrapper module
    scores = munge_scoreses(scoreses, df)
  if not use_metafeats:
    X = scores
  else:
    meta_cols = metavectorize.metafeature_columns
    meta = df[meta_cols].values
    # Special f_0 dummy meta feature for learning vanilla weight term per predictor
    metafeats = np.hstack([np.ones( (len(df), 1) ), meta])
    # Oh fuck this, I've spent too long trying to understand np.einsum...
    # (Worth noting that sklearn.preprocessing has a 'PolynomialFeatures' utility
    # that might have been useful here. But this is fine.)
    n_metafeats = metafeats.shape[1]
    logging.info('{} predictors x {} metafeatures -> {} coefs'.format(
      n_predictors, n_metafeats, n_predictors*n_metafeats))
    # X is 'metafeat major'. i.e. the first n_p values for each vector are the 
    # raw scores for each predictor, they're followed by each predictor's score
    # multiplied by the first metafeature and so on.
    X = np.tile(scores, n_metafeats) * np.repeat(metafeats, n_predictors, axis=1)
  return X, y

LRSolver.py 文件源码项目：Steal-ML 作者: ftramer 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def fit(self, X, s):
        _x = np.ones((X.shape[0], X.shape[1] + 1))
        _x[:, : - 1] = X
        self.w, _, _, _ = np.linalg.lstsq(_x, logit(s))

distribution_util_test.py 文件源码项目：DeepLearning_VirtualReality_BigData_Project 作者: rashmitripathi 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def testGetLogitsAndProbsLogits(self):
    p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32)
    logits = special.logit(p)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_probs(
          logits=logits, validate_args=True)

      self.assertAllClose(p, new_p.eval())
      self.assertAllClose(logits, new_logits.eval())

distribution_util_test.py 文件源码项目：DeepLearning_VirtualReality_BigData_Project 作者: rashmitripathi 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def testGetLogitsAndProbsProbability(self):
    p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_probs(
          probs=p, validate_args=True)

      self.assertAllClose(special.logit(p), new_logits.eval())
      self.assertAllClose(p, new_p.eval())