def svgd_kernel(self, h = -1):
sq_dist = pdist(self.theta)
pairwise_dists = squareform(sq_dist)**2
if h < 0: # if h < 0, using median trick
h = np.median(pairwise_dists)
h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))
# compute the rbf kernel
Kxy = np.exp( -pairwise_dists / h**2 / 2)
dxkxy = -np.matmul(Kxy, self.theta)
sumkxy = np.sum(Kxy, axis=1)
for i in range(self.theta.shape[1]):
dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
dxkxy = dxkxy / (h**2)
return (Kxy, dxkxy)
python类pdist()的实例源码
def test_cuttreeHybrid():
from dynamicTreeCut import cutreeHybrid
d = np.transpose(np.arange(1, 10001).reshape(100, 100))
distances = pdist(d, "euclidean")
link = linkage(distances, "average")
test = cutreeHybrid(link, distances)
true = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1]
assert (test['labels'] == true).all()
assert False
def svgd_kernel(self, theta, h = -1):
sq_dist = pdist(theta)
pairwise_dists = squareform(sq_dist)**2
if h < 0: # if h < 0, using median trick
h = np.median(pairwise_dists)
h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))
# compute the rbf kernel
Kxy = np.exp( -pairwise_dists / h**2 / 2)
dxkxy = -np.matmul(Kxy, theta)
sumkxy = np.sum(Kxy, axis=1)
for i in range(theta.shape[1]):
dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
dxkxy = dxkxy / (h**2)
return (Kxy, dxkxy)
def tsne_cluster_cuisine(df,sublist):
lenlist=[0]
df_sub = df[df['cuisine']==sublist[0]]
lenlist.append(df_sub.shape[0])
for cuisine in sublist[1:]:
temp = df[df['cuisine']==cuisine]
df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
lenlist.append(df_sub.shape[0])
df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
print df_X.shape, lenlist
dist = squareform(pdist(df_X, metric='cosine'))
tsne = TSNE(metric='precomputed').fit_transform(dist)
palette = sns.color_palette("hls", len(sublist))
plt.figure(figsize=(10,10))
for i,cuisine in enumerate(sublist):
plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
plt.legend()
#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
def get_close_markers(markers,centroids=None, min_distance=20):
if centroids is None:
centroids = [m['centroid']for m in markers]
centroids = np.array(centroids)
ti = np.triu_indices(centroids.shape[0], 1)
def full_idx(i):
#get the pair from condensed matrix index
#defindend inline because ti changes every time
return np.array([ti[0][i], ti[1][i]])
#calculate pairwise distance, return dense distace matrix (upper triangle)
distances = pdist(centroids,'euclidean')
close_pairs = np.where(distances<min_distance)
return full_idx(close_pairs)
def _compute_dispersion_matrix(X, labels):
n = len(np.unique(labels))
dist = np.zeros((n, n))
ITR = list(itertools.combinations_with_replacement(range(n), 2))
for i, j in tqdm(ITR):
if i == j:
d = pdist(X[labels == i], metric='cosine')
else:
d = cdist(X[labels == i], X[labels == j], metric='cosine')
# Only take upper diagonal (+diagonal elements)
d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)]
dist[i, j] = dist[j, i] = d.mean()
return dist
def construct_data_synthetic_Laplacian(D, lifetime, noise_var, N_train, N_test):
# pick datapoint locations uniformly at random
N = N_train + N_test
X = np.random.rand(N, D)
# construct kernel matrix
K = scipy.exp(- lifetime * squareform(pdist(X, 'cityblock')))
# sample the function at picked locations x
y = np.linalg.cholesky(K).dot(np.random.randn(N)) + np.sqrt(noise_var) * np.random.randn(N)
# pick training indices sequentially
indices_train = range(0, N_train)
indices_test = range(N_train, N)
# split the data into train and test
X_train = X[indices_train]
X_test = X[indices_test ]
y_train = y[indices_train]
y_test = y[indices_test ]
return X_train, y_train, X_test, y_test
# SAMPLING
def calculate_position_error_at_z(self, z=0):
'''
Returns the standard deviation in x and y, and the euclidean distance between
pairs of coordinates.
'''
xy_at_given_z = []
for ax in self.axes:
x, y = ax.getXY(z=z)
xy_at_given_z.append((x,y))
X = [xy[0] for xy in xy_at_given_z]
Y = [xy[1] for xy in xy_at_given_z]
pairs = []
for x, y in zip(X, Y):
pairs.append((x,y))
distances = distance.pdist(pairs)
return ((np.std(X), np.std(Y)), np.mean(distances))
def distance(self, x, y):
"""
Computes squared euclidean distance between vectors x and y. Returns float.
"""
d = x - y
# dist = numpy.ma.inner(d,d)
dist = numpy.sum(d ** 2)
# dist = pdist([x,y], 'sqeuclidean')
# n = len(x)
# code = \
# """
# int i;
# double sum = 0.0, delta = 0.0f;
# for (i = 0; i < n; i++) {
# delta = (x[i]-y[i]);
# sum += delta*delta;
# }
# return_val = sum;
# """
# dist = weave.inline(code, ['x', 'y', 'n'])
return dist
def compute_dcov_dcorr_statistics(y, alpha):
""" Compute the statistics to distance covariance/correlation.
Parameters
----------
y : (number of samples, dimension)-ndarray
One row of y corresponds to one sample.
alpha : float
0 < alpha < 2
Returns
-------
c : (number of samples, dimension)-ndarray
Computed statistics.
"""
d = squareform(pdist(y))**alpha
ck = mean(d, axis=0)
c = d - ck - ck[:, newaxis] + mean(ck)
return c
def plot_hamming_dist(s,W,brec):
masks = s[:,0,:].T>0
x_hat = np.zeros(masks.shape)
for ii in range(masks.shape[1]):
Weff = W*masks[:,ii]
x_hat[:,ii] = np.linalg.inv(np.eye(100)-Weff).dot(brec)
fig = plt.figure()
plt.pcolormesh(squareform(pdist(np.sign(x_hat[:,:]).T,metric='hamming'))) #,vmax=.3)
plt.colorbar()
plt.ylim([0,x_hat.shape[1]])
plt.xlim([0,x_hat.shape[1]])
plt.axes().set_aspect('equal')
plt.title('Hamming Distance Between Putative FPs')
plt.ylabel('Time')
plt.xlabel('Time')
return fig
def test_mean_of_distances(self):
"""Test the mean of distances calculation (and the sum)."""
X = np.array([[0.3, 0.4],
[0.1, 4.0],
[2.0, 1.0],
[0.0, 0.5]])
counts = np.array([3, 2, 1, 2])
scipy_X = []
for c, count in enumerate(counts):
for i in range(count):
scipy_X.append(X[c])
# SciPy:
Y = pdist(scipy_X, metric=cdist)
scipy_N = np.sum(counts)
N_unique_pairs = scipy_N * (scipy_N - 1.0) / 2.0
scipy_mean = Y.mean()
self.assertTrue(Y.shape[0] == N_unique_pairs)
self.assertTrue(scipy_mean == (np.sum(Y) / N_unique_pairs))
# C & Cython:
c_mean = c_mean_dist(X, counts)
self.assertTrue(np.isclose(c_mean, scipy_mean))
def kernel(self, X, Y=None):
GenericTests.check_type(X,'X',np.ndarray,2)
# if X=Y, use more efficient pdist call which exploits symmetry
if Y is None:
dists = squareform(pdist(X, 'euclidean'))
else:
GenericTests.check_type(Y,'Y',np.ndarray,2)
assert(shape(X)[1]==shape(Y)[1])
dists = cdist(X, Y, 'euclidean')
if self.nu==0.5:
#for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process
K = (self.sigma**2.) * exp( -dists / self.width )
elif self.nu==1.5:
K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width )
elif self.nu==2.5:
K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width )
else:
raise NotImplementedError()
return K
def _compute_J(x, window_starts, L):
"""Compute the cost, which is proportional to the
difference between pairs of windows"""
# Get all windows and zscore them
N_windows = len(window_starts)
windows = np.zeros((N_windows, L))
for w in range(N_windows):
temp = x[window_starts[w]:window_starts[w] + L]
windows[w] = (temp - np.mean(temp)) / np.std(temp)
# Calculate distances for all pairs of windows
dist = pdist(np.vstack(windows),
lambda u, v: np.sum((u - v) ** 2))
J = np.sum(dist) / float(L * (N_windows - 1))
return J
dataset.py 文件源码
项目:neural-combinatorial-optimization-rl-tensorflow
作者: MichelDeudon
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def k_nearest_neighbor(self, sequence):
# Calculate dist_matrix
dist_array = pdist(sequence)
dist_matrix = squareform(dist_array)
# Construct tour
new_sequence = [sequence[0]]
current_city = 0
visited_cities = [0]
for i in range(1,len(sequence)):
j = np.random.randint(0,min(len(sequence)-i,self.kNN))
next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j]
visited_cities.append(next_city)
new_sequence.append(sequence[next_city])
current_city = next_city
return np.asarray(new_sequence)
# Generate random TSP-TW instance
def kmeans_classify(A, means, metric):
# set up the lists to return
data_classes = []
data_metrics = []
# set up the distance to be the max number possible
dist = sys.maxint
for v in A: # for every data vector
index = 0
for i in range(len(means.tolist())):
m = means.tolist()[i]
norm_matrix = np.vstack((v, m))
if norms.pdist(norm_matrix, metric)[0] < dist:
dist = norms.pdist(norm_matrix, metric)[0]
index = i
data_classes.append([index])
data_metrics.append([dist])
dist = sys.maxint
return np.matrix(data_classes), np.matrix(data_metrics)
def create_3D_distance_matrix(vox_ijk, epi_fname):
"""Compute distance between voxels in the volume.
Parameters
----------
vox_ijk : n x 3 array
Indices of voxels included in the ROI.
epi_fname : file path
Path to image defining the volume space.
Returns
-------
dmat : array
Dense square distance matrix.
"""
aff = nib.load(epi_fname).affine
vox_ras = nib.affines.apply_affine(aff, vox_ijk)
dmat = squareform(pdist(vox_ras))
return dmat
def PQTrain(data, lenSubVec,numSubCenter):
(dataSize, dataDim)=data.shape
if 0!=dataDim%lenSubVec:
print "Cannot partition the feature space with the given segment number"
return
numSubVec=dataDim/lenSubVec
centers=npy.zeros((numSubVec*numSubCenter,lenSubVec),dtype=npy.float32)
distOfCenters=npy.zeros((numSubCenter,numSubCenter,numSubVec),dtype=npy.float32)
objKmeans=KMeans(numSubCenter,'k-means++',3,100,0.001)
for ii in range(numSubVec):
print("PQ training. Processing "+str(ii)+"-th sub-vector")
objKmeans.fit(data[:,ii*lenSubVec:(ii+1)*lenSubVec])
centers[ii*numSubCenter:(ii+1)*numSubCenter,:]= objKmeans.cluster_centers_
distOfCenters[:,:,ii]=squareform(pdist(objKmeans.cluster_centers_,metric="euclidean"))
model={"centers":centers,"distOfCenters":distOfCenters}
return model
def _compute_centers(self, X, sparse, rs):
"""Generate centers, then compute tau, dF and dN vals"""
super(GRBFRandomLayer, self)._compute_centers(X, sparse, rs)
centers = self.components_['centers']
sorted_distances = np.sort(squareform(pdist(centers)))
self.dF_vals = sorted_distances[:, -1]
self.dN_vals = sorted_distances[:, 1]/100.0
#self.dN_vals = 0.0002 * np.ones(self.dF_vals.shape)
tauNum = np.log(np.log(self.grbf_lambda) /
np.log(1.0 - self.grbf_lambda))
tauDenom = np.log(self.dF_vals/self.dN_vals)
self.tau_vals = tauNum/tauDenom
self._extra_args['taus'] = self.tau_vals
# get radii according to ref [1]
def kernel_matrix(svm_model, original_X):
if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
K = (svm_model.zeta + svm_model.gamma * np.dot(original_X, original_X.T)) ** svm_model.Q
elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
pairwise_dists = squareform(pdist(original_X, 'euclidean'))
K = np.exp(-svm_model.gamma * (pairwise_dists ** 2))
'''
K = np.zeros((svm_model.data_num, svm_model.data_num))
for i in range(svm_model.data_num):
for j in range(svm_model.data_num):
if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
K[i, j] = Kernel.polynomial_kernel(svm_model, original_X[i], original_X[j])
elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
K[i, j] = Kernel.gaussian_kernel(svm_model, original_X[i], original_X[j])
'''
return K
samediff.py 文件源码
项目:Multi-view-neural-acoustic-words-embeddings
作者: opheadacheh
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def generate_matches_array(labels):
"""
Return an array of bool in the same order as the distances from
`scipy.spatial.distance.pdist` indicating whether a distance is for
matching or non-matching labels.
"""
N = len(labels)
matches = np.zeros(N * (N - 1) / 2, dtype=np.bool)
# For every distance, mark whether it is a true match or not
cur_matches_i = 0
for n in range(N):
cur_label = labels[n]
matches[cur_matches_i:cur_matches_i + (N - n) - 1] = np.asarray(labels[n + 1:]) == cur_label
cur_matches_i += N - n - 1
return matches
samediff.py 文件源码
项目:Multi-view-neural-acoustic-words-embeddings
作者: opheadacheh
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def check_argv():
"""Check the command line arguments."""
parser = argparse.ArgumentParser(description=__doc__.strip().split("\n")[0], add_help=False)
parser.add_argument("labels_fn", help="file of labels")
parser.add_argument(
"distances_fn",
help="file providing the distances between each pair of labels in the same order as "
"`scipy.spatial.distance.pdist`"
)
parser.add_argument(
"--binary_dists", dest="binary_dists", action="store_true",
help="distances are given in float32 binary format "
"(default is to assume distances are given in text format)"
)
parser.set_defaults(binary_dists=False)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
return parser.parse_args()
# -----------------------------------------------------------------------------#
# MAIN FUNCTION #
# -----------------------------------------------------------------------------#
def plot_clusters_igraph(responsibilities, color_groups):
from scipy.spatial.distance import pdist, correlation, squareform
from igraph import Graph, plot
data = responsibilities[:, :2]
Y = pdist(data, hellinger_distance)
print(Y[:30], file=stderr)
# return
g = Graph()
n = data.shape[0]
g.add_vertices(n)
colors = ["grey"]*n
palette = list(colors_dict.values())
for j, group in enumerate(color_groups):
c = palette[j]
for i in group:
colors[i] = c
l = g.layout_mds(dist=squareform(Y))
plot(g, layout=l, vertex_color=colors, bbox=(1024, 1024), vertex_size=5)
# c&p from stackexchange
def get_cell_data(n=50, seed=0):
np.random.seed(seed)
cells_data = np.load('./data/cells_data.npy')
sample_cells = np.random.choice(cells_data.shape[0], n, replace=False)
D = pdist(cells_data[sample_cells, :], 'euclidean')
Z = linkage(D, 'ward')
return cells_data, Z, D
def get_random_data(n=50, seed=0):
np.random.seed(seed)
data = np.random.choice(10000, (n, 1), replace=False)
D = pdist(data, 'euclidean')
Z = linkage(D, 'ward')
return data, Z, D
def kernel_matrix(self, X):
# check for stupid mistake
assert X.shape[0] > X.shape[1]
sq_dists = squareform(pdist(X, 'sqeuclidean'))
K = np.exp(-sq_dists/ self.scaling)
return K
def k_multiple(self, X):
"""
Efficient computation of kernel matrix without loops
Effectively does the same as calling self.k on all pairs of the input
"""
assert(X.ndim == 1)
sq_dists = squareform(pdist(X.reshape(len(X), 1), 'sqeuclidean'))
K = np.exp(-(sq_dists) / self.scaling)
return K
def k_multiple_dim(self, X):
# check for stupid mistake
assert X.shape[0] > X.shape[1]
sq_dists = squareform(pdist(X, 'sqeuclidean'))
K = np.exp(-(sq_dists) / self.scaling)
return K
def k_multiple(self, X):
"""
Efficient computation of kernel matrix without loops
Effectively does the same as calling self.k on all pairs of the input
"""
assert(X.ndim == 1)
sq_dists = squareform(pdist(X.reshape(len(X), 1), 'sqeuclidean'))
K = np.exp(-(sq_dists) / self.scaling)
return K
def k_multiple_dim(self, X):
# check for stupid mistake
assert X.shape[0] > X.shape[1]
sq_dists = squareform(pdist(X, 'sqeuclidean'))
K = np.exp(-(sq_dists) / self.scaling)
return K