def predict(self, new_df):
""" Use the estimated model to make predictions. \
New levels of grouping factors are given fixed effects,
with zero random effects
Args:
new_df (DataFrame): data to make predictions on
Returns:
n x J matrix, where n is the number of rows \
of new_df and J is the number \
of possible response values. The (i, j) entry of \
this matrix is the probability that observation i \
realizes response level j.
"""
eta = super(CumulativeLogisticRegression, self).predict(new_df)
intercepts = self.effects['intercepts']
J = self.J
preds = np.zeros((len(eta), J))
preds[:, 0] = expit(intercepts[0] + eta)
preds[:, J - 1] = 1.0 - expit(intercepts[J - 2] + eta)
for j in range(1, J - 1):
preds[:, j] = expit(intercepts[j] + eta) - \
expit(intercepts[j - 1] + eta)
return preds
python类expit()的实例源码
def _mu(distr, z, eta):
"""The non-linearity (inverse link)."""
if distr in ['softplus', 'gamma']:
mu = np.log1p(np.exp(z))
elif distr == 'poisson':
mu = z.copy()
intercept = (1 - eta) * np.exp(eta)
mu[z > eta] = z[z > eta] * np.exp(eta) + intercept
mu[z <= eta] = np.exp(z[z <= eta])
elif distr == 'gaussian':
mu = z
elif distr == 'binomial':
mu = expit(z)
elif distr == 'probit':
mu = norm.cdf(z)
return mu
def logistic_regression_cost_gradient(parameters, input, output):
"""
Cost and gradient for logistic regression
:param parameters: weight vector
:param input: feature vector
:param output: binary label (0 or 1)
:return: cost and gradient for the input and output
"""
prediction = expit(np.dot(input, parameters))
if output:
inside_log = prediction
else:
inside_log = 1.0 - prediction
if inside_log != 0.0:
cost = -np.log(inside_log)
else:
cost = np.finfo(float).min
gradient = (prediction - output) * input
return cost, gradient
def assertLogisticRegression(self, sampler):
data_size = 3
input_size = 5
inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size))
outputs = np.random.randint(0, 2, size=data_size)
initial_parameters = np.random.normal(scale=1e-5, size=input_size)
# Create cost and gradient function for gradient descent and check its gradient
cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient,
inputs, outputs, sampler=sampler)
result = gradient_check(cost_gradient, initial_parameters)
self.assertEqual([], result)
# Train logistic regression and see if it predicts correct labels
final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100)
predictions = expit(np.dot(inputs, final_parameters)) > 0.5
# Binary classification of 3 data points with 5 dimension is always linearly separable
for output, prediction in zip(outputs, predictions):
self.assertEqual(output, prediction)
precompute_probs.py 文件源码
项目:instacart-basket-prediction
作者: colinmorris
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def get_probmap(model, sess):
"""{uid -> {pid -> prob}}"""
# Start a fresh pass through the validation data
sess.run(model.dataset.new_epoch_op())
pmap = defaultdict(dict)
i = 0
nseqs = 0
to_fetch = [model.lastorder_logits, model.dataset['uid'], model.dataset['pid']]
while 1:
try:
final_logits, uids, pids = sess.run(to_fetch)
except tf.errors.OutOfRangeError:
break
batch_size = len(uids)
nseqs += batch_size
final_probs = expit(final_logits)
for uid, pid, prob in zip(uids, pids, final_probs):
pmap[uid][pid] = prob
i += 1
tf.logging.info("Computed probabilities for {} users over {} sequences in {} batches".format(
len(pmap), nseqs, i
))
return pmap
precompute_probs.py 文件源码
项目:instacart-basket-prediction
作者: colinmorris
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--tag', default='pairs')
parser.add_argument('--fold', default='test')
args = parser.parse_args()
#metavec = load_metavectors(args.fold)
clf = train.load_model(args.tag)
X, y = vectorize.load_fold(args.fold, args.tag)
if hasattr(clf, 'predict_proba'):
probs = clf.predict_proba(X)
# returns an array of shape (n, 2), where each len-2 subarray
# has the probability of the negative and positive classes. which is silly.
probs = probs[:,1]
else:
scores = clf.decision_function(X)
probs = expit(scores)
pdict = pdictify(probs, args.fold)
common.save_pdict_for_tag(args.tag, pdict, args.fold)
def logistic(x, prime=0):
if prime == 0:
##v = np.empty_like(x)
##mask = x < 0.0
##zl = np.exp(x[mask])
##zl = 1.0 / (1.0 + zl)
##v[mask] = zl
##zh = np.exp(-x[~mask])
##zh = zh / (1.0 + zh)
##v[~mask] = zh
v = sps.expit(x)
return v
elif prime == 1:
return logistic(x) * (1.0 - logistic(x))
else:
raise NotImplementedError('%d order derivative not implemented.' % int(prime))
def train_cbow_pair_softmax(model, target, input_word_indices, l1, alpha, learn_vectors=True, learn_hidden=True):
neu1e = zeros(l1.shape)
target_vect = zeros(model.syn1neg.shape[0])
target_vect[target.index] = 1.
l2 = copy(model.syn1neg)
fa = expit(dot(l1, l2.T)) # propagate hidden -> output
ga = (target_vect - fa) * alpha # vector of error gradients multiplied by the learning rate
if learn_hidden:
model.syn1neg += outer(ga, l1) # learn hidden -> output
neu1e += dot(ga, l2) # save error
if learn_vectors:
# learn input -> hidden, here for all words in the window separately
if not model.cbow_mean and input_word_indices:
neu1e /= len(input_word_indices)
for i in input_word_indices:
model.wv.syn0[i] += neu1e * model.syn0_lockf[i]
return neu1e
def score_cbow_labeled_pair(model, targets, l1):
if model.hs:
prob = []
# FIXME this cycle should be executed internally in numpy
for target in targets:
l2a = model.syn1[target.point]
sgn = (-1.0) ** target.code # ch function, 0-> 1, 1 -> -1
prob.append(prod(expit(sgn * dot(l1, l2a.T))))
# Softmax
else:
def exp_dot(x):
return exp(dot(l1, x.T))
prob_num = exp_dot(model.syn1neg[[t.index for t in targets]])
prob_den = np_sum(apply_along_axis(exp_dot, 1, model.syn1neg))
prob = prob_num / prob_den
return prob
def temp_log_loss(w, X, Y, alpha):
n_classes = Y.shape[1]
w = w.reshape(n_classes, -1)
intercept = w[:, -1]
w = w[:, :-1]
z = safe_sparse_dot(X, w.T) + intercept
denom = expit(z)
#print denom
#print denom.sum()
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
#print denom
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum()
loss += 0.5 * alpha * squared_norm(w)
return loss
def logistic_regression(x, t, w, eps=1e-2, max_iter=int(1e3)):
N = x.shape[1]
Phi = np.vstack([np.ones(N), phi(x)]).T
for k in range(max_iter):
y = expit(Phi.dot(w))
R = np.diag(np.ones(N) * (y * (1 - y)))
H = Phi.T.dot(R).dot(Phi)
g = Phi.T.dot(y - t)
w_new = w - linalg.solve(H, g)
diff = linalg.norm(w_new - w) / linalg.norm(w)
if (diff < eps):
break
w = w_new
print('{0:5d} {1:10.6f}'.format(k, diff))
return w
def transform(self, y):
yexpit = expit(self.scale * y)
return yexpit
def make_image_data():
"""Make some simple data."""
N = 100
M = 3
# N 28x28 RGB float images
x = expit(RAND.randn(N, 28, 28, 3)).astype(np.float32)
w = np.linspace(-2.5, 2.5, 28*28*3)
Y = np.dot(x.reshape(-1, 28*28*3), w) + RAND.randn(N, 1)
X = tf.tile(tf.expand_dims(x, 0), [M, 1, 1, 1, 1])
return x, Y, X
def predict(self, new_df):
""" Use estimated coefficients to make predictions on new data
Args:
new_df (DataFrame). DataFrame to make predictions on.
Returns:
array-like. Predictions on the response scale, i.e. probabilities
"""
return expit(super(LogisticRegression, self).predict(new_df))
def l2_clogistic_llh(X, Y, alpha, beta, penalty_matrix, offset):
""" Penalized log likelihood function for proportional odds cumulative logit model
Args:
X : array_like. design matrix
Y : array_like. response matrix
alpha : array_like. intercepts.\
must have shape == one less than the number of columns of `Y`
beta : array_like. parameters.\
must have shape == number of columns of X
penalty_matrix : array_like. Regularization matrix
offset : array_like, optional. Defaults to 0
Returns:
scalar : penalized loglikelihood
"""
offset = 0.0 if offset is None else offset
obj = 0.0
J = Y.shape[1]
Xb = dot(X, beta) + offset
for j in range(J):
if j == 0:
obj += dot(np.log(expit(alpha[j] + Xb)), Y[:, j])
elif j == J - 1:
obj += dot(np.log(1 - expit(alpha[j - 1] + Xb)), Y[:, j])
else:
obj += dot(np.log(expit(alpha[j] + Xb) - expit(alpha[j - 1] + Xb)), Y[:, j])
obj -= 0.5 * dot(beta, dot(penalty_matrix, beta))
return -np.inf if np.isnan(obj) else obj
def _l2_clogistic_gradient_IL(X, alpha, beta, offset=None, **kwargs):
""" Helper function for calculating the cumulative logistic gradient. \
The inverse logit of alpha[j + X*beta] is \
ubiquitous in gradient and Hessian calculations \
so it's more efficient to calculate it once and \
pass it around as a parameter than to recompute it every time
Args:
X : array_like. design matrix
alpha : array_like. intercepts. must have shape == one less than the number of columns of `Y`
beta : array_like. parameters. must have shape == number of columns of X
offset : array_like, optional. Defaults to 0
n : int, optional.\
You must specify the number of rows if there are no main effects
Returns:
array_like. n x J-1 matrix where entry i,j is the inverse logit of (alpha[j] + X[i, :] * beta)
"""
J = len(alpha) + 1
if X is None:
n = kwargs.get("n")
else:
n = X.shape[0]
if X is None or beta is None:
Xb = 0.
else:
Xb = dot(X, beta) + (0 if offset is None else offset)
IL = np.zeros((n, J - 1))
for j in range(J - 1):
IL[:, j] = expit(alpha[j] + Xb)
return IL
def _grad_mu(distr, z, eta):
"""Derivative of the non-linearity."""
if distr in ['softplus', 'gamma']:
grad_mu = expit(z)
elif distr == 'poisson':
grad_mu = z.copy()
grad_mu[z > eta] = np.ones_like(z)[z > eta] * np.exp(eta)
grad_mu[z <= eta] = np.exp(z[z <= eta])
elif distr == 'gaussian':
grad_mu = np.ones_like(z)
elif distr == 'binomial':
grad_mu = expit(z) * (1 - expit(z))
elif distr in 'probit':
grad_mu = norm.pdf(z)
return grad_mu
def neural_network_cost_gradient(parameters, input, output):
"""
3-layer network cost and gradient function
:param parameters: pair of (W1, W2)
:param input: input vector
:param output: index to correct label
:return: cross entropy cost and gradient
"""
W1, W2 = parameters
input = input.reshape(-1, 1)
hidden_layer = expit(W1.dot(input))
inside_softmax = W2.dot(hidden_layer)
# TODO: allow softmax to normalize column vector
prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1)
cost = -np.sum(np.log(prediction[output]))
one_hot = np.zeros_like(prediction)
one_hot[output] = 1
delta = prediction - one_hot
gradient_W2 = delta.dot(hidden_layer.T)
gradient_W1 = sigmoid_gradient(hidden_layer) * W2.T.dot(delta).dot(input.T)
gradient = [gradient_W1, gradient_W2]
return cost, gradient
def test_sigmoid(self):
x = np.array([[1, 2], [-1, -2]])
f = expit(x)
g = sigmoid_gradient(f)
expected = np.array([[0.73105858, 0.88079708],
[0.26894142, 0.11920292]])
self.assertNumpyEqual(expected, f)
expected = np.array([[0.19661193, 0.10499359],
[0.19661193, 0.10499359]])
self.assertNumpyEqual(expected, g)
def test_logistic_regression(self):
input = np.random.uniform(-10.0, 10.0, size=10)
output = np.random.randint(0, 2)
def logistic_regression_wrapper(parameters):
return logistic_regression_cost_gradient(parameters, input, output)
initial_parameters = np.random.normal(scale=1e-5, size=10)
result = gradient_check(logistic_regression_wrapper, initial_parameters)
self.assertEqual([], result)
# Train logistic regression and see if it predicts correct label
final_parameters, cost_history = gradient_descent(logistic_regression_wrapper, initial_parameters, 100)
prediction = expit(np.dot(input, final_parameters)) > 0.5
self.assertEqual(output, prediction)
def test_gradient_check_sigmoid(self):
def sigmoid_check(x):
return expit(x), sigmoid_gradient(expit(x))
x = np.array(0.0)
result = gradient_check(sigmoid_check, x)
self.assertEqual([], result)
feature_importance.py 文件源码
项目:instacart-basket-prediction
作者: colinmorris
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def b_and_a(feat, val):
before, after = alt(feat, val)
print 'Setting {} to {}'.format(feat, val)
delta = after - before
print 'Logits: {:.2f} -> {:.2f} ({}{:.2f})'.format(before, after,
('+' if delta >= 0 else ''), delta
)
print 'Prob: {:.3f} -> {:.3f}'.format(expit(before), expit(after))
precompute_probs.py 文件源码
项目:instacart-basket-prediction
作者: colinmorris
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def main():
parser = argparse.ArgumentParser()
parser.add_argument('tags', nargs='+')
parser.add_argument('--dest-tag', default='stacked',
help='Tag for generated pdict (default: "stacked")')
parser.add_argument('--fold', default='test')
args = parser.parse_args()
metavec = load_metavectors(args.fold)
#clf = train.load_model()
clf = joblib.load('model.pkl')
with time_me('Vectorized fold {}'.format(args.fold)):
# TODO: this fn is not a thing?
X, y = train.vectorize_fold(args.fold, args.tags, metavec)
if hasattr(clf, 'predict_proba'):
probs = clf.predict_proba(X)
# returns an array of shape (n, 2), where each len-2 subarray
# has the probability of the negative and positive classes. which is silly.
probs = probs[:,1]
else:
scores = clf.decision_function(X)
probs = expit(scores)
pdict = pdictify(probs, metavec)
common.save_pdict_for_tag(args.dest_tag, pdict, args.fold)
def delta(u, v):
""" cosine ° sigmoid
>>> delta([0.2], [0.3])
0.5
>>> delta([0.3], [0.2])
0.5
>>> delta([0.1,0.9], [-0.9,0.1]) == delta([-0.9,0.1], [0.1,0.9])
True
"""
# TODO scale with a and c
return expit(cosine(u, v))
def delta(X, Y, n_jobs=-1, a=1, c=0):
"""Pairwise delta function: cosine and sigmoid
:X: TODO
:returns: TODO
"""
D = pairwise_distances(X, Y, metric="cosine", n_jobs=n_jobs)
if c != 0:
D -= c
if a != 1:
D *= a
D = expit(D)
return D
def hypothesisFunc(theta, x):
hx = expit(np.dot(x, theta));
return hx;
def logistic(X):
return logistic_sigmoid(X, out=X)
def _activate(self, X, layer):
if self.activationFuns[layer]=='log':
return expit(X)
elif self.activationFuns[layer]=='tan':
return 2 / (1 + np.exp(-2*X)) - 1
else:
return X
def H(initTheta,X):
# X1 = FeatureScaling(X)
hypothesis = expit(np.dot(X,initTheta))
return hypothesis
def logistic_loss(w, X, Y, alpha):
"""
Implementation of the logistic loss function when Y is a probability
distribution.
loss = -SUM_i SUM_k y_ik * log(P[yi == k]) + alpha * ||w||^2
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
intercept = 0
if n_classes > 2:
fit_intercept = w.size == (n_classes * (n_features + 1))
w = w.reshape(n_classes, -1)
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
fit_intercept = w.size == (n_features + 1)
if fit_intercept:
intercept = w[-1]
w = w[:-1]
z = safe_sparse_dot(X, w.T) + intercept
if n_classes == 2:
# in the binary case, simply compute the logistic function
p = np.vstack([log_logistic(-z), log_logistic(z)]).T
else:
# compute the logistic function for each class and normalize
denom = expit(z)
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum() # Y.sum() = 1
loss += 0.5 * alpha * squared_norm(w)
return loss
loss = - (Y * p).sum() + 0.5 * alpha * squared_norm(w)
return loss