def make_one_hot(X, onehot_size):
"""
DESCRIPTION:
Make a one-hot version of X
PARAM:
X: 1d numpy with each value in X representing the class of X
onehot_size: length of the one hot vector
RETURN:
2d numpy tensor, with each row been the onehot vector
"""
if onehot_size < 450:
dig_one = np.zeros((onehot_size, onehot_size))
np.fill_diagonal(dig_one, 1)
rX = dig_one[np.asarray(X)]
else:
# for large onehot size, this is faster
rX = np.zeros((len(X), onehot_size))
for i in range(len(X)):
rX[i, X[i]] = 1
return rX
python类fill_diagonal()的实例源码
def test_binary_perplexity_stability():
# Binary perplexity search should be stable.
# The binary_search_perplexity had a bug wherein the P array
# was uninitialized, leading to sporadically failing tests.
k = 10
n_samples = 100
random_state = check_random_state(0)
distances = random_state.randn(n_samples, 2).astype(np.float32)
# Distances shouldn't be negative
distances = np.abs(distances.dot(distances.T))
np.fill_diagonal(distances, 0.0)
last_P = None
neighbors_nn = np.argsort(distances, axis=1)[:, :k].astype(np.int64)
for _ in range(100):
P = _binary_search_perplexity(distances.copy(), neighbors_nn.copy(),
3, verbose=0)
P1 = _joint_probabilities_nn(distances, neighbors_nn, 3, verbose=0)
if last_P is None:
last_P = P
last_P1 = P1
else:
assert_array_almost_equal(P, last_P, decimal=4)
assert_array_almost_equal(P1, last_P1, decimal=4)
def test_gradient():
# Test gradient of Kullback-Leibler divergence.
random_state = check_random_state(0)
n_samples = 50
n_features = 2
n_components = 2
alpha = 1.0
distances = random_state.randn(n_samples, n_features).astype(np.float32)
distances = distances.dot(distances.T)
np.fill_diagonal(distances, 0.0)
X_embedded = random_state.randn(n_samples, n_components)
P = _joint_probabilities(distances, desired_perplexity=25.0,
verbose=0)
fun = lambda params: _kl_divergence(params, P, alpha, n_samples,
n_components)[0]
grad = lambda params: _kl_divergence(params, P, alpha, n_samples,
n_components)[1]
assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
decimal=5)
def _check_fix_Q(self, fixed_mu=False):
"""
Check the main diagonal of Q and fix it in case it does not corresond
the definition of the rate matrix. Should be run every time when creating
custom GTR model.
"""
# fix Q
self.Pi /= self.Pi.sum() # correct the Pi manually
# NEEDED TO BREAK RATE MATRIX DEGENERACY AND FORCE NP TO RETURN REAL ORTHONORMAL EIGENVECTORS
self.W += self.break_degen + self.break_degen.T
# fix W
np.fill_diagonal(self.W, 0)
Wdiag = -(self.Q).sum(axis=0)/self.Pi
np.fill_diagonal(self.W, Wdiag)
scale_factor = -np.sum(np.diagonal(self.Q)*self.Pi)
self.W /= scale_factor
if not fixed_mu:
self.mu *= scale_factor
if (self.Q.sum(axis=0) < 1e-10).sum() < self.alphabet.shape[0]: # fix failed
print ("Cannot fix the diagonal of the GTR rate matrix. Should be all zero", self.Q.sum(axis=0))
import ipdb; ipdb.set_trace()
raise ArithmeticError("Cannot fix the diagonal of the GTR rate matrix.")
def compute_db_index(matrix, kmeans):
'''
Compute Davies-Bouldin index, a measure of clustering quality.
Faster and possibly more reliable than silhouette score.
'''
(n, m) = matrix.shape
k = kmeans.n_clusters
centers = kmeans.cluster_centers_
labels = kmeans.labels_
centroid_dists = sp_dist.squareform(sp_dist.pdist(centers))
# Avoid divide-by-zero
centroid_dists[np.abs(centroid_dists) < MIN_CENTROID_DIST] = MIN_CENTROID_DIST
wss = np.zeros(k)
counts = np.zeros(k)
for i in xrange(n):
label = labels[i]
# note: this is 2x faster than scipy sqeuclidean
sqdist = np.square(matrix[i,:] - centers[label,:]).sum()
wss[label] += sqdist
counts[label] += 1
# Handle empty clusters
counts[counts == 0] = 1
scatter = np.sqrt(wss / counts)
mixitude = (scatter + scatter[:, np.newaxis]) / centroid_dists
np.fill_diagonal(mixitude, 0.0)
worst_case_mixitude = np.max(mixitude, axis=1)
db_score = worst_case_mixitude.sum() / k
return db_score
def cor2cov(cor, var=None, sd=None, copy=True):
sd = np.sqrt(var) if var is not None else sd
if isinstance(cor, (DiagonalArray, SubdiagonalArray)):
cor = cor.tonumpyarray()
cor = npu.tondim2(cor, copy=copy)
dim = len(var)
assert dim == np.shape(cor)[0] and dim == np.shape(cor)[1]
np.fill_diagonal(cor, 1.)
cor = (sd.T * (sd * cor).T).T
npu.lowertosymmetric(cor, copy=False)
return cor
def find_distance_matrix(self, vector, metric='cosine'):
'''
compute distance matrix between topis using cosine or euclidean
distance (default=cosine distance)
'''
if metric == 'cosine':
distance_matrix = pairwise_distances(vector,
metric='cosine')
# diagonals should be exactly zero, so remove rounding errors
numpy.fill_diagonal(distance_matrix, 0)
if metric == 'euclidean':
distance_matrix = pairwise_distances(vector,
metric='euclidean')
return distance_matrix
def diagonal_mpa(entries, sites):
"""Returns an MPA with ``entries`` on the diagonal and zeros otherwise.
:param numpy.ndarray entries: one-dimensional array
:returns: :class:`~mpnum.mparray.MPArray` with rank ``len(entries)``.
"""
assert sites > 0
if entries.ndim != 1:
raise NotImplementedError("Currently only supports diagonal MPA with "
"one leg per site.")
if sites < 2:
return mp.MPArray.from_array(entries)
ldim = len(entries)
leftmost_ltens = np.eye(ldim).reshape((1, ldim, ldim))
rightmost_ltens = np.diag(entries).reshape((ldim, ldim, 1))
center_ltens = np.zeros((ldim,) * 3)
np.fill_diagonal(center_ltens, 1)
ltens = it.chain((leftmost_ltens,), it.repeat(center_ltens, sites - 2),
(rightmost_ltens,))
return mp.MPArray(LocalTensors(ltens, cform=(sites - 1, sites)))
#########################
# More physical stuff #
#########################
def test_ignore_no_data_ints(self):
arr = np.ones((1, 16, 16), int)
np.fill_diagonal(arr[0], NO_DATA_INT)
tile = Tile(arr, 'INT', NO_DATA_INT)
rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
value_map = {1: 0}
result = raster_rdd.reclassify(value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1].cells
self.assertTrue((result == np.identity(16, int)).all())
def test_ignore_no_data_floats(self):
arr = np.ones((1, 4, 4))
np.fill_diagonal(arr[0], float('nan'))
tile = Tile(arr, 'FLOAT', float('nan'))
rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
value_map = {1.0: 0.0}
result = raster_rdd.reclassify(value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1].cells
self.assertTrue((result == np.identity(4)).all())
def _update_syn(self, x, eta=0.5):
"""Perform one update of the weights and re-calculate moments in the SYNERGISTIC case."""
m = self.moments
H = (1. / m["X_i^2 | Y"] * m["X_i Z_j"].T).dot(m["X_i Z_j"])
np.fill_diagonal(H, 0)
R = m["X_i Z_j"].T / m["X_i^2 | Y"]
S = np.dot(H, self.ws)
ws = (1. - eta) * self.ws + eta * (R - S)
m = self._calculate_moments_syn(x, ws)
return ws, m
def get_covariance(self):
# This uses E(Xi|Y) formula for non-synergistic relationships
m = self.moments
if self.discourage_overlap:
z = m['rhoinvrho'] / (1 + m['Si'])
cov = np.dot(z.T, z)
cov /= (1. - self.eps**2)
np.fill_diagonal(cov, 1)
return self.theta[1][:, np.newaxis] * self.theta[1] * cov
else:
cov = np.einsum('ij,kj->ik', m["X_i Z_j"], m["X_i Y_j"])
np.fill_diagonal(cov, 1)
return self.theta[1][:, np.newaxis] * self.theta[1] * cov
def seriation(self, A):
n_components = 2
eigen_tol = 0.00001
if sparse.issparse(A):
A = A.todense()
np.fill_diagonal(A, 0)
laplacian, dd = graph_laplacian(A, return_diag=True)
laplacian *= -1
lambdas, diffusion_map = eigsh(laplacian, k=n_components, sigma=1.0, which='LM', tol=eigen_tol)
embedding = diffusion_map.T[n_components::-1] # * dd
sort_index = np.argsort(embedding[1])
return sort_index
def fill_diagonal(a, val, wrap=False):
"""Fills the main diagonal of the given array of any dimensionality.
For an array `a` with ``a.ndim > 2``, the diagonal is the list of
locations with indices ``a[i, i, ..., i]`` all identical. This function
modifies the input array in-place, it does not return a value.
Args:
a (cupy.ndarray): The array, at least 2-D.
val (scalar): The value to be written on the diagonal.
Its type must be compatible with that of the array a.
wrap (bool): If specified, the diagonal is "wrapped" after N columns.
This affects only tall matrices.
Examples
--------
>>> a = cupy.zeros((3, 3), int)
>>> cupy.fill_diagonal(a, 5)
>>> a
array([[5, 0, 0],
[0, 5, 0],
[0, 0, 5]])
.. seealso:: :func:`numpy.fill_diagonal`
"""
# The followings are imported from the original numpy
if a.ndim < 2:
raise ValueError('array must be at least 2-d')
end = None
if a.ndim == 2:
step = a.shape[1] + 1
if not wrap:
end = a.shape[1] * a.shape[1]
else:
if not numpy.alltrue(numpy.diff(a.shape) == 0):
raise ValueError('All dimensions of input must be of equal length')
step = 1 + numpy.cumprod(a.shape[:-1]).sum()
# Since the current cupy does not support a.flat,
# we use a.ravel() instead of a.flat
a.ravel()[:end:step] = val
def knn_initialize(
X,
missing_mask,
verbose=False,
min_dist=1e-6,
max_dist_multiplier=1e6):
"""
Fill X with NaN values if necessary, construct the n_samples x n_samples
distance matrix and set the self-distance of each row to infinity.
Returns contents of X laid out in row-major, the distance matrix,
and an "effective infinity" which is larger than any entry of the
distance matrix.
"""
X_row_major = X.copy("C")
if missing_mask.sum() != np.isnan(X_row_major).sum():
# if the missing values have already been zero-filled need
# to put NaN's back in the data matrix for the distances function
X_row_major[missing_mask] = np.nan
D = all_pairs_normalized_distances(X_row_major)
D_finite_flat = D[np.isfinite(D)]
if len(D_finite_flat) > 0:
max_dist = max_dist_multiplier * max(1, D_finite_flat.max())
else:
max_dist = max_dist_multiplier
# set diagonal of distance matrix to a large value since we don't want
# points considering themselves as neighbors
np.fill_diagonal(D, max_dist)
D[D < min_dist] = min_dist # prevents 0s
D[D > max_dist] = max_dist # prevents infinities
return X_row_major, D, max_dist
def getDiffusionMap(SSM, Kappa, t = -1, includeDiag = True, thresh = 5e-4, NEigs = 51):
"""
:param SSM: Metric between all pairs of points
:param Kappa: Number in (0, 1) indicating a fraction of nearest neighbors
used to autotune neighborhood size
:param t: Diffusion parameter. If -1, do Autotuning
:param includeDiag: If true, include recurrence to diagonal in the markov
chain. If false, zero out diagonal
:param thresh: Threshold below which to zero out entries in markov chain in
the sparse approximation
:param NEigs: The number of eigenvectors to use in the approximation
"""
N = SSM.shape[0]
#Use the letters from the delaPorte paper
K = getW(SSM, int(Kappa*N))
if not includeDiag:
np.fill_diagonal(K, np.zeros(N))
RowSumSqrt = np.sqrt(np.sum(K, 1))
DInvSqrt = sparse.diags([1/RowSumSqrt], [0])
#Symmetric normalized Laplacian
Pp = (K/RowSumSqrt[None, :])/RowSumSqrt[:, None]
Pp[Pp < thresh] = 0
Pp = sparse.csr_matrix(Pp)
lam, X = sparse.linalg.eigsh(Pp, NEigs, which='LM')
lam = lam/lam[-1] #In case of numerical instability
#Check to see if autotuning
if t > -1:
lamt = lam**t
else:
#Autotuning diffusion time
lamt = np.array(lam)
lamt[0:-1] = lam[0:-1]/(1-lam[0:-1])
#Do eigenvector version
V = DInvSqrt.dot(X) #Right eigenvectors
M = V*lamt[None, :]
return M/RowSumSqrt[:, None] #Put back into orthogonal Euclidean coordinates
def coulomb_matrix(self, system):
"""Creates the Coulomb matrix for the given system.
"""
# Calculate offdiagonals
q = system.get_initial_charges()
qiqj = q[None, :]*q[:, None]
idmat = system.get_inverse_distance_matrix()
np.fill_diagonal(idmat, 0)
cmat = qiqj*idmat
# Set diagonal
np.fill_diagonal(cmat, 0.5 * q ** 2.4)
return cmat
def sine_matrix(self, system):
"""Creates the Sine matrix for the given system.
"""
# Cell and inverse cell
B = system.get_cell()
B_inv = system.get_cell_inverse()
# Difference vectors in tensor 3D-tensor-form
diff_tensor = system.get_displacement_tensor()
# Calculate phi
arg_to_sin = np.pi * np.dot(diff_tensor, B_inv)
phi = np.linalg.norm(np.dot(np.sin(arg_to_sin)**2, B), axis=2)
with np.errstate(divide='ignore'):
phi = np.reciprocal(phi)
# Calculate Z_i*Z_j
q = system.get_initial_charges()
qiqj = q[None, :]*q[:, None]
np.fill_diagonal(phi, 0)
# Multiply by charges
smat = qiqj*phi
# Set diagonal
np.fill_diagonal(smat, 0.5 * q ** 2.4)
return smat
def _calc_subsystem_energies(self, ewald_matrix):
"""Modify the give matrix that consists of the eral and reciprocal sums
so that each entry x_ij is the full Ewald sum energy of a system
consisting of atoms i and j.
"""
q = self.q
# Create the self-term array where q1[i,j] is qi**2 + qj**2, except for
# the diagonal, where it is qi**2
q1 = q[None, :]**2 + q[:, None]**2
diag = np.diag(q1)/2
np.fill_diagonal(q1, diag)
q1_prefactor = -self.a/self.sqrt_pi
# Create the charge correction array where q2[i,j] is (qi + qj)**2,
# except for the diagonal where it is qi**2
q2 = q[None, :] + q[:, None]
q2 **= 2
diag = np.diag(q2)/4
np.fill_diagonal(q2, diag)
q2_prefactor = -np.pi/(2*self.volume*self.a_squared)
correction_matrix = q1_prefactor*q1 + q2_prefactor*q2
# Add the terms coming from x_ii and x_jj to the off-diagonal along
# with the corrections
n_atoms = self.n_atoms
final_matrix = np.zeros((n_atoms, n_atoms))
for i in range(n_atoms):
for j in range(n_atoms):
if i == j:
final_matrix[i, j] = ewald_matrix[i, j]
else:
pair_term = 2*ewald_matrix[i, j]
self_term_ii = ewald_matrix[i, i]
self_term_jj = ewald_matrix[j, j]
energy_total = pair_term + self_term_ii + self_term_jj
final_matrix[i, j] = energy_total
final_matrix += correction_matrix
return final_matrix
transformation_tests_func.py 文件源码
项目:3D_Dense_Transformer_Networks
作者: JohnYC1995
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def makeT(self,cp):
# cp: [(k*k*k) x 3] control points
# T: [((k*k*k)+4) x ((k*k*k)+4)]
K = cp.shape[0]
T = np.zeros((K+4, K+4))
T[:K, 0] = 1; T[:K, 1:4] = cp; T[K, 4:] = 1; T[K+1:, 4:] = cp.T
R = squareform(pdist(cp, metric='euclidean'))
R = R * R;R[R == 0] = 1 # a trick to make R ln(R) 0
R = R * np.log(R)
np.fill_diagonal(R, 0)
T[:K, 4:] = R
return T