def predict(self, X):
"""
Predict data using the ``centroids_`` of subclusters.
Avoid computation of the row norms of X.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Input data.
Returns
-------
labels : ndarray, shape(n_samples)
Labelled data.
"""
X = check_array(X, accept_sparse='csr')
self._check_fit(X)
reduced_distance = safe_sparse_dot(X, self.subcluster_centers_.T)
reduced_distance *= -2
reduced_distance += self._subcluster_norms
return self.subcluster_labels_[np.argmin(reduced_distance, axis=1)]
python类safe_sparse_dot()的实例源码
def _fit_owl_fista(X, y, w, loss, max_iter=500, max_linesearch=20, eta=2.0,
tol=1e-3, verbose=0):
# least squares loss
def sfunc(coef, grad=False):
y_scores = safe_sparse_dot(X, coef)
if grad:
obj, lp = loss(y, y_scores, return_derivative=True)
grad = safe_sparse_dot(X.T, lp)
return obj, grad
else:
return loss(y, y_scores)
def nsfunc(coef, L):
return prox_owl(coef, w / L)
coef = np.zeros(X.shape[1])
return fista(sfunc, nsfunc, coef, max_iter, max_linesearch,
eta, tol, verbose)
def _intercept_dot(w, X, y):
"""Computes y * np.dot(X, w).
It takes into consideration if the intercept should be fit or not.
Parameters
----------
w : ndarray, shape (n_features,) or (n_features + 1,)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
y : ndarray, shape (n_samples,)
Array of labels.
"""
c = 0.
if w.size == X.shape[1] + 1:
c = w[-1]
w = w[:-1]
z = safe_sparse_dot(X, w) + c
return w, c, y * z
def polynomial_kernel(self, x, y, gamma):
"""
Custom polynomial kernel function, similarities of vectors over polynomials
:param x: array of input vectors
:param y: array of input vectors
:param gamma: gamma
:returns:
- pk: inner product
"""
c = 1
degree = 2
pk = ssd(x, y.T, dense_output = True)
pk *= gamma
pk += c
pk **= degree
return pk
def sigmoid_kernel(self, x, y, gamma):
"""
Custom sigmoid kernel function, similarities of vectors in a sigmoid kernel matrix
:param x: array of input vectors
:param y: array of input vectors
:param gamma: gamma
:returns:
- sk: inner product
"""
c = 1
sk = ssd(x, y.T, dense_output = True)
sk *= gamma
sk += c
np.tanh(np.array(sk, dtype='float64'), np.array(sk, dtype = 'float64'))
return np.array(sk, dtype = 'float64')
def _compute_input_activations(self, X):
"""Compute input activations given X"""
n_samples = X.shape[0]
mlp_acts = np.zeros((n_samples, self.n_hidden))
if (self._use_mlp_input):
b = self.components_['biases']
w = self.components_['weights']
mlp_acts = self.alpha * (safe_sparse_dot(X, w) + b)
rbf_acts = np.zeros((n_samples, self.n_hidden))
if (self._use_rbf_input):
radii = self.components_['radii']
centers = self.components_['centers']
scale = self.rbf_width * (1.0 - self.alpha)
rbf_acts = scale * cdist(X, centers)/radii
self.input_activations_ = mlp_acts + rbf_acts
def _decision_function(self, X):
"""Predict using the linear model
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Returns
-------
array, shape (n_samples,)
Predicted target values per element in X.
"""
check_is_fitted(self, ["t_", "coef_", "intercept_"], all_or_any=all)
X = check_array(X, accept_sparse='csr')
scores = safe_sparse_dot(X, self.coef_.T,
dense_output=True) + self.intercept_
return scores.ravel()
def _decision_function(self, X):
"""Predict using the linear model
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Returns
-------
array, shape (n_samples,)
Predicted target values per element in X.
"""
check_is_fitted(self, ["t_", "coef_", "intercept_"], all_or_any=all)
X = check_array(X, accept_sparse='csr')
scores = safe_sparse_dot(X, self.coef_.T,
dense_output=True) + self.intercept_
return scores.ravel()
def test_sparse_decision_function():
#Test decision_function
#Sanity check, test that decision_function implemented in python
#returns the same as the one in libsvm
# multi class:
svc = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovo')
clf = svc.fit(iris.data, iris.target)
dec = safe_sparse_dot(iris.data, clf.coef_.T) + clf.intercept_
assert_array_almost_equal(dec, clf.decision_function(iris.data))
# binary:
clf.fit(X, Y)
dec = np.dot(X, clf.coef_.T) + clf.intercept_
prediction = clf.predict(X)
assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
assert_array_almost_equal(
prediction,
clf.classes_[(clf.decision_function(X) > 0).astype(np.int).ravel()])
expected = np.array([-1., -0.66, -1., 0.66, 1., 1.])
assert_array_almost_equal(clf.decision_function(X), expected, 2)
def fgrad(we, X, y, l1, l2):
nsamples, nfactors = X.shape
w0 = we[0]
w = we[1:(nfactors+1)] - we[(nfactors+1):]
yz = y * (safe_sparse_dot(X, w) + w0)
f = - np.sum(log_logistic(yz)) + l1 * np.sum(we[1:]) + 0.5 * l2 * np.dot(w, w)
e = (expit(yz) - 1) * y
g = safe_sparse_dot(X.T, e) + l2 * w
g0 = np.sum(e)
grad = np.concatenate([g, -g]) + l1
grad = np.insert(grad, 0, g0)
return f, grad
def score(self, user, candidates, context):
# i_mat is (n_item_context, n_item) for all possible items
# extract only target items
i_mat = self.i_mat[:, candidates]
n_target = len(candidates)
# u_mat will be (n_user_context, n_item) for the target user
u_vec = np.concatenate((user.feature, context))
u_vec = np.array([u_vec]).T
u_mat = sp.csr_matrix(np.repeat(u_vec, n_target, axis=1))
# stack them into (p, n_item) matrix
Y = sp.vstack((u_mat, i_mat))
Y = self.proj.reduce(Y)
Y = sp.csr_matrix(preprocessing.normalize(Y, norm='l2', axis=0))
X = np.identity(self.k) - np.dot(self.U_r, self.U_r.T)
A = safe_sparse_dot(X, Y, dense_output=True)
return ln.norm(A, axis=0, ord=2)
def _mean_hiddens(self, v, temperature=1.0):
"""Computes the probabilities P(h=1|v).
v : array-like, shape (n_samples, n_features)
Values of the visible layer.
Returns
-------
h : array-like, shape (n_samples, n_components)
Corresponding mean field values for the hidden layer.
"""
p = safe_sparse_dot(v, self.components_.T/temperature)
p += self.intercept_hidden_/(min(1.0, temperature) if BIASED_PRIOR else temperature)
return expit(p, out=p)
def _free_energy(self, v):
"""Computes the free energy F(v) = - log sum_h exp(-E(v,h)).
v : array-like, shape (n_samples, n_features)
Values of the visible layer.
Returns
-------
free_energy : array-like, shape (n_samples,)
The value of the free energy.
"""
return (- safe_sparse_dot(v, self.intercept_visible_)
- np.logaddexp(0, safe_sparse_dot(v, self.components_.T)
+ self.intercept_hidden_).sum(axis=1))
def _fit(self, v_pos):
"""Inner fit for one mini-batch.
Adjust the parameters to maximize the likelihood of v using
Stochastic Maximum Likelihood (SML).
v_pos : array-like, shape (n_samples, n_features)
The data to use for training.
"""
h_pos = self._mean_hiddens(v_pos)
# TODO: Worth trying with visible probabilities rather than binary states.
# PG: it is common to use p_i instead of sampling a binary value'... 'it reduces
# sampling noise this allowing faster learning. There is some evidence that it leads
# to slightly worse density models'
# I'm confounded by the fact that we seem to get more effective models WITHOUT
# softmax visible units. The only explanation I can think of is that it's like
# a pseudo-version of using visible probabilities. Without softmax, v_neg
# can have multiple 1s per one-hot vector, which maybe somehow accelerates learning?
# Need to think about this some more.
v_neg = self._sample_visibles(self.h_samples_)
h_neg = self._mean_hiddens(v_neg)
lr = float(self.learning_rate) / v_pos.shape[0]
update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T
update -= np.dot(h_neg.T, v_neg) / self.fantasy_to_batch
# L2 weight penalty
update -= self.components_ * self.weight_cost
self.components_ += lr * update
self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0)/self.fantasy_to_batch)
self.intercept_visible_ += lr * (np.asarray(
v_pos.sum(axis=0)).squeeze() -
v_neg.sum(axis=0)/self.fantasy_to_batch)
h_neg[self.rng_.uniform(size=h_neg.shape) < h_neg] = 1.0 # sample binomial
self.h_samples_ = np.floor(h_neg, h_neg)
def norms_svec_resampling(X, orthogonal_basis, rank, num_samples=1000):
"""
Resampling procedure described in AJIVE paper for Wedin bound
Parameters
---------
orthogonal_basis: basis vectors for the orthogonal complement
of the score space
X: the observed data
rank: number of columns to resample
num_samples: how many resamples
Output
------
an array of the resampled norms
"""
# TODO: rename some arguments e.g. orthogonal_basis -- this is not really
# a basis, also resampled_projection
resampled_norms = [0]*num_samples
for s in range(num_samples):
# sample columns from orthogonal basis
sampled_col_index = np.random.choice(orthogonal_basis.shape[1], size=rank, replace=True)
resampled_basis = orthogonal_basis[:, sampled_col_index]
# ^ this is V* from AJIVE p12
# project observed data
# resampled_projection = np.dot(X, resampled_basis)
resampled_projection = safe_sparse_dot(X, resampled_basis)
# compute resampled operator L2 norm
resampled_norms[s] = np.linalg.norm(resampled_projection, ord=2)
return resampled_norms
def transform(self, X, y=None):
return safe_sparse_dot(self.vect_.transform(X), self.embeds)
def fit_transform(self, X, y=None, **fit_params):
self.vect_ = TfidfVectorizer(vocabulary=self.embed_vocab,
analyzer=_lower_words_getter,
norm='l1',
use_idf=False)
return safe_sparse_dot(self.vect_.fit_transform(X), self.embeds)
def joint_feature(self, x, y):
if isinstance(y, DocLabel):
Y_prop, Y_link, compat, second_order = self._marg_rounded(x, y)
else:
Y_prop, Y_link, compat, second_order = self._marg_fractional(x, y)
prop_acc = safe_sparse_dot(Y_prop.T, x.X_prop) # node_cls * node_feats
link_acc = safe_sparse_dot(Y_link.T, x.X_link) # link_cls * link_feats
f_sec_ord = []
if len(second_order):
second_order = second_order.reshape(-1, len(x.second_order))
if self.coparents:
f_sec_ord.append(safe_sparse_dot(second_order[0], x.X_sec_ord))
second_order = second_order[1:]
if self.grandparents:
f_sec_ord.append(safe_sparse_dot(second_order[0], x.X_sec_ord))
second_order = second_order[1:]
if self.siblings:
f_sec_ord.append(safe_sparse_dot(second_order[0], x.X_sec_ord))
elif self.n_second_order_factors_:
# document has no second order factors so the joint feature
# must be filled with zeros manually
f_sec_ord = [np.zeros(self.n_second_order_features_)
for _ in range(self.n_second_order_factors_)]
jf = np.concatenate([prop_acc.ravel(), link_acc.ravel(),
compat.ravel()] + f_sec_ord)
return jf
# basically reversing the joint feature
def transform_lsi(self, X):
""" LSI transform, normalized by the inverse of the eigen values"""
X = check_array(X, accept_sparse='csr')
return safe_sparse_dot(X, self.components_.T).dot(
np.diag(1./self.singular_values_[:self.n_components]))
def update_X(X, mu, k=6):
#U, S, VT = svdp(X, k=k)
U, S, VT = svds(X, k=k, which='LM')
P = np.c_[np.ones((k, 1)), 1-S, 1./2./mu-S]
sigma_star = np.zeros(k)
for t in range(k):
p = P[t, :]
delta = p[1]**2 - 4 * p[0] * p[2]
if delta <= 0:
sigma_star[t] = 0.
else:
solution = np.roots(p)
solution = solution.tolist()
solution.sort(key=abs)
solution = np.array(solution)
if solution[0] * solution[1] <= 0:
sigma_star[t] = solution[1]
elif solution[1] < 0:
sigma_star[t] = 0.
else:
f = np.log(1 + solution[1]) + mu * (solution[1] - s[t])**2
if f > mu *s[t]**2:
sigma_star[t] = 0.
else:
sigma_star[t] = solution[1]
sigma_star = sp.csr_matrix(np.diag(sigma_star))
sigma_star = safe_sparse_dot(safe_sparse_dot(U, sigma_star), VT)
sigma_star[abs(sigma_star)<1e-10] = 0
return sp.lil_matrix(sigma_star)
def _decision_function(self, X):
return safe_sparse_dot(X, self.coef_)
def _get_output(self, X):
y_pred = _poly_predict(X, self.P_[0, :, :], self.lams_, kernel='anova',
degree=self.degree)
if self.fit_linear:
y_pred += safe_sparse_dot(X, self.w_)
if self.fit_lower == 'explicit' and self.degree == 3:
# degree cannot currently be > 3
y_pred += _poly_predict(X, self.P_[1, :, :], self.lams_,
kernel='anova', degree=2)
return y_pred
def _D(X, P, degree=2):
"""The "replacement" part of the homogeneous polynomial kernel.
D[i, j] = sum_k [(X_ik * P_jk) ** degree]
"""
return safe_sparse_dot(safe_power(X, degree), P.T ** degree)
extreme_learning_machines.py 文件源码
项目:extreme-learning-machines
作者: IssamLaradji
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def _compute_hidden_activations(self, X):
"""Compute the hidden activations."""
hidden_activations = safe_sparse_dot(X, self.coef_hidden_)
hidden_activations += self.intercept_hidden_
# Apply the activation method
activation = ACTIVATIONS[self.activation]
hidden_activations = activation(hidden_activations)
return hidden_activations
extreme_learning_machines.py 文件源码
项目:extreme-learning-machines
作者: IssamLaradji
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def _decision_scores(self, X):
"""Predict using the ELM model
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The input data.
Returns
-------
y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs)
The predicted values.
"""
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
if self.batch_size is None:
hidden_activations = self._compute_hidden_activations(X)
y_pred = safe_sparse_dot(hidden_activations, self.coef_output_)
else:
n_samples = X.shape[0]
batches = gen_batches(n_samples, self.batch_size)
y_pred = np.zeros((n_samples, self.n_outputs_))
for batch in batches:
h_batch = self._compute_hidden_activations(X[batch])
y_pred[batch] = safe_sparse_dot(h_batch, self.coef_output_)
return y_pred
def linear_kernel_matrix(self, x, y):
"""
Custom inner product. The returned matrix is a Linear Kernel Gram Matrix
:param x: your dataset
:param y: another dataset (sth like transposed(x))
"""
return ssd(x,y, dense_output = True)
def tf_map(real, imag):
sqrt_i = np.sqrt(ssd(imag.T, imag))
sqrt_r = np.sqrt(ssd(real.T, real))
stft_k = ssd(real.T, imag)
return stft_k / (sqrt_i * sqrt_r)
def SGD(a, lab, Q, reg_param):
iterations = 1
for i in xrange(7):
for tau in xrange(len(a)):
wx = ssd(a, Q[tau,:], dense_output = True)
a = a * (1-1/iterations)
if(lab[tau]*wx < 1):
a[tau] = lab[tau]/(reg_param * iterations)
iterations += 1
return a
def _forward_pass(self, activations, with_output_activation=True):
"""Perform a forward pass on the network by computing the values
of the neurons in the hidden layers and the output layer.
Parameters
----------
activations: list, length = n_layers - 1
The ith index of the list holds the values of the ith layer.
with_output_activation : bool, default True
If True, the output passes through the output activation
function, which is either the softmax function or the
logistic function
"""
# Iterate over the hidden layers
for i in range(self.n_layers_ - 1):
activations[i + 1] = safe_sparse_dot(activations[i],
self.layers_coef_[i])
activations[i + 1] += self.layers_intercept_[i]
# For the hidden layers
if i + 1 != self.n_layers_ - 1:
hidden_activation = ACTIVATIONS[self.activation]
activations[i + 1] = hidden_activation(activations[i + 1])
# For the last layer
if with_output_activation:
output_activation = ACTIVATIONS[self.out_activation_]
activations[i + 1] = output_activation(activations[i + 1])
return activations
def _compute_cost_grad(self, layer, n_samples, activations, deltas,
coef_grads, intercept_grads):
"""Compute the cost gradient for the layer."""
coef_grads[layer] = safe_sparse_dot(activations[layer].T,
deltas[layer]) / n_samples
coef_grads[layer] += (self.alpha * self.layers_coef_[layer])
intercept_grads[layer] = np.mean(deltas[layer], 0)
return coef_grads, intercept_grads