def get_mfcc_feat(self):
# creating codebook with all models
mfcc_feats = None
for filename in glob.iglob('../data/voices/*.wav'):
print filename
(rate, sig) = wav.read(filename)
# MFCC Features. Each row corresponds to MFCC for a frame
mfcc_person = mfcc(sig.astype(np.float64), rate)
if mfcc_feats is None:
mfcc_feats = mfcc_person
else:
mfcc_feats = np.concatenate((mfcc_feats, mfcc_person), axis=0)
# Normalize the features
whitened = whiten(mfcc_feats)
self.codebook, labeled_obs = kmeans2(data=whitened, k=3)
python类kmeans2()的实例源码
def _calculate_gap(self, X: Union[pd.DataFrame, np.ndarray], n_refs: int, n_clusters: int) -> Tuple[float, int]:
"""
Calculate the gap value of the given data, n_refs, and number of clusters.
Return the resutling gap value and n_clusters
"""
# Holder for reference dispersion results
ref_dispersions = np.zeros(n_refs) # type: np.ndarray
# For n_references, generate random sample and perform kmeans getting resulting dispersion of each loop
for i in range(n_refs):
# Create new random reference set
random_data = np.random.random_sample(size=X.shape) # type: np.ndarray
# Fit to it, getting the centroids and labels, and add to accumulated reference dispersions array.
centroids, labels = kmeans2(data=random_data,
k=n_clusters,
iter=10,
minit='points') # type: Tuple[np.ndarray, np.ndarray]
dispersion = self._calculate_dispersion(X=random_data, labels=labels, centroids=centroids) # type: float
ref_dispersions[i] = dispersion
# Fit cluster to original data and create dispersion calc.
centroids, labels = kmeans2(data=X, k=n_clusters, iter=10, minit='points')
dispersion = self._calculate_dispersion(X=X, labels=labels, centroids=centroids)
# Calculate gap statistic
gap_value = np.log(np.mean(ref_dispersions)) - np.log(dispersion)
return gap_value, int(n_clusters)
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype('float64'), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def init_hypers(self, y_train):
"""Summary
Returns:
TYPE: Description
Args:
y_train (TYPE): Description
"""
N = self.N
M = self.M
Din = self.Din
Dout = self.Dout
x_train = self.x_train
if N < 10000:
centroids, label = kmeans2(x_train, M, minit='points')
else:
randind = np.random.permutation(N)
centroids = x_train[randind[0:M], :]
zu = centroids
if N < 10000:
X1 = np.copy(x_train)
else:
randind = np.random.permutation(N)
X1 = X[randind[:5000], :]
x_dist = cdist(X1, X1, 'euclidean')
triu_ind = np.triu_indices(N)
ls = np.zeros((Din, ))
d2imed = np.median(x_dist[triu_ind])
for i in range(Din):
ls[i] = 2 * np.log(d2imed + 1e-16)
sf = np.log(np.array([0.5]))
params = dict()
params['sf'] = sf
params['ls'] = ls
params['zu'] = zu
params['sn'] = np.log(0.01)
return params
improved_neural_doodle.py 文件源码
项目:Neural-Style-Transfer-Windows
作者: titu1994
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def Kmeans(data, num_K):
centroid, label = kmeans2(
data=data, k=num_K, iter=100, minit='points', missing='warn')
return centroid, label
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def init_hypers(self, x_train=None, key_suffix=''):
"""Summary
Args:
x_train (None, optional): Description
key_suffix (str, optional): Description
Returns:
TYPE: Description
"""
# dict to hold hypers, inducing points and parameters of q(U)
N = self.N
M = self.M
Din = self.Din
Dout = self.Dout
if x_train is None:
ls = np.log(np.ones((Din, )) + 0.1 * np.random.rand(Din, ))
sf = np.log(np.array([1]))
zu = np.tile(np.linspace(-1, 1, M).reshape((M, 1)), (1, Din))
else:
if N < 10000:
centroids, label = kmeans2(x_train, M, minit='points')
else:
randind = np.random.permutation(N)
centroids = x_train[randind[0:M], :]
zu = centroids
if N < 10000:
X1 = np.copy(x_train)
else:
randind = np.random.permutation(N)
X1 = X[randind[:5000], :]
x_dist = cdist(X1, X1, 'euclidean')
triu_ind = np.triu_indices(N)
ls = np.zeros((Din, ))
d2imed = np.median(x_dist[triu_ind])
for i in range(Din):
ls[i] = np.log(d2imed + 1e-16)
sf = np.log(np.array([0.5]))
params = dict()
params['sf' + key_suffix] = sf
params['ls' + key_suffix] = ls
params['zu' + key_suffix] = zu
return params
def init_hypers(self, x_train=None, key_suffix=''):
"""Summary
Args:
x_train (None, optional): Description
key_suffix (str, optional): Description
Returns:
TYPE: Description
"""
# dict to hold hypers, inducing points and parameters of q(U)
N = self.N
M = self.M
Din = self.Din
Dout = self.Dout
if x_train is None:
ls = np.log(np.ones((Din, )) + 0.1 * np.random.rand(Din, ))
sf = np.log(np.array([1]))
zu = np.tile(np.linspace(-1, 1, M).reshape((M, 1)), (1, Din))
else:
if N < 10000:
centroids, label = kmeans2(x_train, M, minit='points')
else:
randind = np.random.permutation(N)
centroids = x_train[randind[0:M], :]
zu = centroids
if N < 10000:
X1 = np.copy(x_train)
else:
randind = np.random.permutation(N)
X1 = X[randind[:5000], :]
x_dist = cdist(X1, X1, 'euclidean')
triu_ind = np.triu_indices(N)
ls = np.zeros((Din, ))
d2imed = np.median(x_dist[triu_ind])
for i in range(Din):
ls[i] = np.log(d2imed + 1e-16)
sf = np.log(np.array([0.5]))
params = dict()
params['sf' + key_suffix] = sf
params['ls' + key_suffix] = ls
params['zu' + key_suffix] = zu
return params
def bounding_ellipsoids(points, pointvol=0., vol_dec=0.5, vol_check=2.):
"""
Calculate a set of ellipsoids that bound the collection of points.
Parameters
----------
points : `~numpy.ndarray` with shape (npoints, ndim)
A set of coordinates.
pointvol : float, optional
Volume represented by a single point. When provided,
used to set a minimum bound on the ellipsoid volume
as `npoints * pointvol`. Default is `0.`.
vol_dec : float, optional
The required fractional reduction in volume after splitting an
ellipsoid in order to to accept the split. Default is `0.5`.
vol_check : float, optional
The factor used to when checking whether the volume of the
original bounding ellipsoid is large enough to warrant more
trial splits via `ell.vol > vol_check * npoints * pointvol`.
Default is `2.0`.
Returns
-------
mell : :class:`MultiEllipsoid` object
The :class:`MultiEllipsoid` object used to bound the
collection of points.
"""
if not HAVE_KMEANS:
raise ValueError("scipy.cluster.vq.kmeans2 is required to compute "
"ellipsoid decompositions.") # pragma: no cover
# Calculate the bounding ellipsoid for the points possibly
# enlarged to a minimum volume.
ell = bounding_ellipsoid(points, pointvol=pointvol)
# Recursively split the bounding ellipsoid until the volume of each
# split no longer decreases by a factor of `vol_dec`.
ells = _bounding_ellipsoids(points, ell, pointvol=pointvol,
vol_dec=vol_dec, vol_check=vol_check)
return MultiEllipsoid(ells=ells)