python类kmeans()的实例源码-面圈网

analysis.py 文件源码项目：data-analysis 作者: ymohanty 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def kmeans_numpy(d, headers, K, whiten=True):
    # assign to A the result of getting the data from your Data object
    A = d.get_data(headers)

    # assign to W the result of calling vq.whiten on A
    W = vq.whiten(A)

    # assign to codebook, bookerror the result of calling vq.kmeans with W and K
    codebook, bookerror = vq.kmeans(W, K)

    # assign to codes, error the result of calling vq.vq with W and the codebook
    codes, error = vq.vq(W, codebook)

    # return codebook, codes, and error
    return codebook, codes, error


# prep the k-means clustering algorithm by getting initial cluster means

train.py 文件源码项目：GenefyHMM 作者: adit-39 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def vector_quantize(data_dict, vs, bins):
    codebooks = {}
    vq_data = {}
    for size in vs.keys():
        all_size_data = []
        for disease in vs[size]:
            all_size_data.extend(data_dict[disease])
        #whitened = sp.whiten(all_size_data)
        #codebooks[size] = sp.kmeans(whitened, bins)[0]
        codebooks[size] = sp.kmeans(np.asarray(all_size_data), bins)[0]
    pickle.dump(codebooks,open("all_codebooks.pkl","wb"))
    for dis in data_dict.keys():
        n = len(data_dict[dis])
        m = len(data_dict[dis][0])
        vq_data[dis] = map(str,sp.vq(np.reshape(data_dict[dis],(n,m)), codebooks[len(data_dict[dis][0])])[0])
    return vq_data

classifiers.py 文件源码项目：data-analysis 作者: ymohanty 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def build(self, A, categories, K=None):
        '''Builds the classifier give the data points in A and the categories'''

        # figure out how many categories there are and get the mapping (np.unique)
        unique, mapping = np.unique(np.array(categories.T), return_inverse=True)
        self.num_classes = len(unique)
        self.num_features = A.shape[0]
        self.categories = categories

        # for each category i, build the set of exemplars
        for i in range(self.num_classes):
            if K is None:
                self.exemplars.append(A[(mapping == i),:])
            else:
                codebook,codes = vq.kmeans(A[(mapping == i),:],K)
                self.exemplars.append(codebook)

        return

util_FSP.py 文件源码项目：PyME 作者: vikramsunkara 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def seperate_via_kmeans(state_space,p,K,tau=0.1):
    from scipy.cluster.vq import kmeans 
    #centres= np.floor(kmeans2(state_space,K)[0]) # these are the original lines

    # the following are being added as hacks

    #_all_cores_filled_ = False
    #while(_all_cores_filled_ == False):
        #centres, distributed = kmeans(state_space,K)
        #print("going into k means" + "we only have " + str(np.max(distributed)))
        #if np.max(distributed) == K-1:
            #_all_cores_filled_ = True

    # bhack to make just the K means work

    centres, stuff = kmeans(state_space,K)

    # hack ends here    

    #proportions = partition_algo_distances(state_space,centres,tau)
    proportions = partition_algo_distances_tight(state_space,centres,tau)
    sub_state_space, sub_prob = seperate_via_proportions(state_space,proportions, p)
    return sub_state_space, sub_prob, centres

test_clustering.py 文件源码项目：pymatgen-diffusion 作者: materialsvirtuallab 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_cluster(self):
        data = np.random.uniform(size=(10, 5))
        data = list(data)
        d2 = np.random.uniform(size=(10, 5)) + ([5] * 5)
        data.extend(list(d2))
        d2 = np.random.uniform(size=(10, 5)) + ([-5] * 5)
        data.extend(list(d2))
        data = np.array(data)

        k = Kmeans()
        clusters = []
        for i in range(10):
            clusters.append(k.cluster(data, 3))
        c1, l1, ss = min(clusters, key=lambda d: d[2])
        c2, d = kmeans(data, 3)
        same = False
        for a in itertools.permutations(c2):
            if np.allclose(c1, a):
                same = True
                break
        self.assertTrue(same)

utils.py 文件源码项目：cg 作者: michaelhabeck 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def kmeans(X, K):
        km = KMeans(K).fit(X)
        return km.cluster_centers_

utils.py 文件源码项目：cg 作者: michaelhabeck 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def kmeans(X, K):
        return vq.kmeans(X, K)[0]

quantize.py 文件源码项目：Caffe-Python-Tutorial 作者: tostq 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def kmeans_net(net, layers, num_c=16, initials=None):
    # net: ??
    # layers: ??????
    # num_c: ???????
    # initials: ??????
    codebook = {} # ????
    if type(num_c) == type(1):
        num_c = [num_c] * len(layers)
    else:
        assert len(num_c) == len(layers)

    # ?????????
    print "==============Perform K-means============="
    for idx, layer in enumerate(layers):
        print "Eval layer:", layer
        W = net.params[layer][0].data.flatten()
        W = W[np.where(W != 0)] # ????0???
        # ?????????????????
        if initials is None:  # Default: uniform sample
            min_W = np.min(W)
            max_W = np.max(W)
            initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
            codebook[layer], _ = scv.kmeans(W, initial_uni)
        elif type(initials) == type(np.array([])):
            codebook[layer], _ = scv.kmeans(W, initials)
        elif initials == 'random':
            codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
        else:
            raise Exception

        # ?0?????
        codebook[layer] = np.append(0.0, codebook[layer])
        print "codebook size:", len(codebook[layer])

    return codebook

# ???????

noteshrink.py 文件源码项目：noteshrink 作者: mzucker 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get_palette(samples, options, return_mask=False, kmeans_iter=40):

    '''Extract the palette for the set of sampled RGB values. The first
palette entry is always the background color; the rest are determined
from foreground pixels by running K-means clustering. Returns the
palette, as well as a mask corresponding to the foreground pixels.

    '''

    if not options.quiet:
        print('  getting palette...')

    bg_color = get_bg_color(samples, 6)

    fg_mask = get_fg_mask(bg_color, samples, options)

    centers, _ = kmeans(samples[fg_mask].astype(np.float32),
                        options.num_colors-1,
                        iter=kmeans_iter)

    palette = np.vstack((bg_color, centers)).astype(np.uint8)

    if not return_mask:
        return palette
    else:
        return palette, fg_mask

######################################################################

moods_affinity.py 文件源码项目：LyricsMoodClassifier 作者: valeriaalampi 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def relevant_moods(song):
    """
    :param song: single song document, taken from a previously filled queue
    :return moods: list of relevant moods
    """
    all_moods = song['moods']

    if len(all_moods) == 0:
        return None

    ordered_score_moods = sorted(all_moods.items(), key=lambda x: x[1], reverse=True)
    # pprint(ordered_score_moods)
    ordered_scores = sorted(all_moods.values(), reverse=True)

    # print(ordered_scores)

    features = np.asarray(ordered_scores)
    codebook, distortion = kmeans(features, 2)
    codebook = sorted(codebook.tolist(), reverse=True)

    moods = []

    for m in ordered_score_moods:
        mood = m[0]
        score = m[1]
        if len(codebook) > 1:
            if abs(score - codebook[0]) < abs(score - codebook[1]):
                moods.append(mood)
            else:
                continue
        else:
            moods.append(mood)

    # print(moods)
    return moods

# ### Quering the DB to retrieve the songs moods

# create a shared queue for all songs

preprocess.py 文件源码项目：simple-linear-regression 作者: williamd4112 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def kmeans(x, k):
    centroids, dist = _kmeans(x, k)
    idx, _ = vq(x,centroids)
    return idx, centroids, dist

analysis.py 文件源码项目：data-analysis 作者: ymohanty 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors


# test function

compounds.py 文件源码项目：deep_learning_chemical 作者: samocooper 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def cluster(matrix):
    whitened = whiten(matrix.todense())

    # for x in range(25, 40):
    #     means, distortion = kmeans(whitened, x)
    #     print distortion

    means, distortion = kmeans(whitened, 30)

    # pickle.dump(means, open('30means-' + sys.argv[1] + '.pkl', 'wb'))

    return means, distortion

points.py 文件源码项目：pyhiro 作者: wanweiwei07 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def k_means(points, k, **kwargs):
    '''
    Find k centroids that attempt to minimize the k- means problem:
    https://en.wikipedia.org/wiki/Metric_k-center

    Arguments
    ----------
    points: (n, d) set of points
    k: int, number of centroids to compute
    **kwargs: passed directly to scipy.cluster.vq.kmeans

    Returns
    ----------
    centroids: (k, d) set of points
    labels: (n) set of indexes for which points belong to which centroid
    '''
    from scipy.cluster.vq import kmeans
    from scipy.spatial import cKDTree

    points = np.asanyarray(points)
    points_std = points.std(axis=0)
    whitened = points / points_std
    centroids_whitened, distortion = kmeans(whitened, k, **kwargs)
    centroids = centroids_whitened * points_std
    tree = cKDTree(centroids)
    labels = tree.query(points, k=1)[1]
    return centroids, labels

palette.py 文件源码项目：clitorisvulgaris 作者: fhoehl 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def find_dominant_colors(image):
    """Cluster the colors of the image in CLUSTER_NUMBER of clusters. Returns
    an array of dominant colors reverse sorted by cluster size.
    """

    array = img_as_float(fromimage(image))

    # Reshape from MxNx4 to Mx4 array
    array = array.reshape(scipy.product(array.shape[:2]), array.shape[2])

    # Remove transparent pixels if any (channel 4 is alpha)
    if array.shape[-1] > 3:
        array = array[array[:, 3] == 1]

    # Finding centroids (centroids are colors)
    centroids, _ = kmeans(array, CLUSTER_NUMBER)

    # Allocate pixel to a centroid cluster
    observations, _ = vq(array, centroids)

    # Calculate the number of pixels in a cluster
    histogram, _ = scipy.histogram(observations, len(centroids))

    # Sort centroids by number of pixels in their cluster
    sorted_centroids = sorted(zip(centroids, histogram),
                              key=lambda x: x[1],
                              reverse=True)

    sorted_colors = tuple((couple[0] for couple in sorted_centroids))

    return sorted_colors

icetracking.py 文件源码项目：mht 作者: jonatanolofsson 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def run_kmeans(img):
    """Run kmeans and plot result."""
    features, shape = get_features(img)
    classified = kmeans_classify(features, shape)
    indices, num_objs = label(classified, shape)

    plot_classes(indices, num_objs)
    globpos = find_point_objects(img.lat, img.lon, indices, num_objs)
    return globpos

icetracking.py 文件源码项目：mht 作者: jonatanolofsson 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def kmeans_classify(features, shape, label=True, fill=False):
    """Run the k-means algorithm."""
    print("Starting kmeans")
    whitened = whiten(features)
    init = np.array((whitened.min(0), whitened.mean(0), whitened.max(0)))
    codebook, _ = kmeans(whitened, init)
    classified, _ = vq(whitened, codebook)
    print("Finished kmeans")
    return classified

clustering.py 文件源码项目：PyFusionGUI 作者: SyntaxVoid 项目源码文件源码阅读 381 收藏 0 点赞 0 评论 0

def k_means_clustering(instance_array, n_clusters=9, sin_cos = 1, number_of_starts = 30, seed=None, use_scikit=1,**kwargs):
    '''
    This runs the k-means clustering algorithm as implemented in scipy - change to scikit-learn?

    SH: 7May2013
    '''
    from sklearn.cluster import KMeans
    print 'starting kmeans algorithm, k=%d, retries : %d, sin_cos = %d'%(n_clusters,number_of_starts,sin_cos)
    if sin_cos==1:
        print '  using sine and cosine of the phases'
        sin_cos_instances = np.zeros((instance_array.shape[0],instance_array.shape[1]*2),dtype=float)
        sin_cos_instances[:,::2]=np.cos(instance_array)
        sin_cos_instances[:,1::2]=np.sin(instance_array)
        input_array = sin_cos_instances
        #code_book,distortion = vq.kmeans(sin_cos_instances, n_clusters,iter=number_of_starts)
        #cluster_assignments, point_distances = vq.vq(sin_cos_instances, code_book)
    else:
        print '  using raw phases'
        input_array = instance_array
        #code_book,distortion = vq.kmeans(instance_array, n_clusters,iter=number_of_starts)
        #cluster_assignments, point_distances = vq.vq(instance_array, code_book)
    #pickle.dump(multiple_run_results,file(k_means_output_filename,'w'))
    if use_scikit:
        print 'using scikit learn'
        tmp = KMeans(init='k-means++', n_clusters=n_clusters, n_init = number_of_starts, n_jobs=1, random_state = seed)
        cluster_assignments = tmp.fit_predict(input_array)
        code_book = tmp.cluster_centers_
    else:
        print 'using vq from scipy'
        code_book,distortion = vq.kmeans(input_array, n_clusters,iter=number_of_starts)
        cluster_assignments, point_distances = vq.vq(input_array, code_book)
    if sin_cos:
        cluster_details = {'k_means_centroids_sc':code_book}
    else:
        cluster_details = {'k_means_centroids':code_book}
    return cluster_assignments, cluster_details

##################################################################################
#############################k-means periodic algorithm##############################

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

kdllib.py 文件源码项目：crikey 作者: kastnerkyle 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def fetch_fruitspeech_softmax():
    fs, d, wav_names = fetch_sample_speech_fruit()
    def matcher(name):
        return name.split("/")[1]

    classes = [matcher(wav_name) for wav_name in wav_names]
    all_chars = [c for c in sorted(list(set("".join(classes))))]
    char2code = {v: k for k, v in enumerate(all_chars)}
    vocabulary_size = len(char2code.keys())
    y = []
    for n, cl in enumerate(classes):
        y.append(tokenize_ind(cl, char2code))

    # Is it kosher to kmeans on all the data?
    X, _apply, _re = apply_lpc_softmax_preproc(d)

    """
    for n, Xi in enumerate(X[::8]):
        di = _re(Xi)
        wavfile.write("t_%i.wav" % n, fs, soundsc(di))

    raise ValueError()
    """

    speech = {}
    speech["vocabulary_size"] = vocabulary_size
    speech["vocabulary"] = char2code
    speech["sample_rate"] = fs
    speech["data"] = X
    speech["target"] = y
    speech["reconstruct"] = _re
    return speech

abc_lib.py 文件源码项目：attributeBasedClustering 作者: silenteddie 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def kmeans_clustering (vectorLayer, attributesList, normalize, clusterNumber, outputFieldName):
    from scipy.cluster.vq import kmeans,vq
    from numpy import array
    fullObjectsList = []
    features = vectorLayer.getFeatures()

    for feature in features:
        fullObjectsList.append([])
        for attribute in attributesList:
            if feature[attribute[0]]:
                fullObjectsList[len(fullObjectsList)-1].append(feature[attribute[0]])
            else:
                fullObjectsList[len(fullObjectsList)-1].append(0)

    #NORMALIZING
    if normalize:
        i = 0
        maxValues = []
        while i < len(attributesList):

            maxValues.append(max(abs(item[i]) for item in fullObjectsList))
            i += 1

        j = 0
        while j < len(fullObjectsList):
            i = 0
            while i < len(fullObjectsList[j]):
                fullObjectsList[j][i] = (fullObjectsList[j][i] * 1.0) / (maxValues[i] * 1.0)
                i += 1
            j += 1

    data = array(fullObjectsList)

    centroids,_ = kmeans(data, clusterNumber, 25)
    idx,_ = vq(data,centroids)
    idx = idx.tolist()
    vectorLayerDataProvider = vectorLayer.dataProvider()

    # Create field of not exist
    if vectorLayer.fieldNameIndex(outputFieldName) == -1:
        vectorLayerDataProvider.addAttributes([QgsField(outputFieldName, QVariant.Int)])

    vectorLayer.updateFields()
    vectorLayer.startEditing()
    attrIdx = vectorLayer.fieldNameIndex(outputFieldName)
    features = vectorLayer.getFeatures()

    i = 0
    for feature in features:
        vectorLayer.changeAttributeValue(feature.id(), attrIdx, int(idx[i]))
        i += 1

    vectorLayer.updateFields()
    vectorLayer.commitChanges()