python类AffinityPropagation()的实例源码

analyze_predictions.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def compare_clusters(X,Y,method='spectral',s=10000):
    A = (X/np.linalg.norm(X,axis=0)).T
    A[np.isnan(A)] = 0
    B = (Y/np.linalg.norm(Y,axis=0)).T
    B[np.isnan(B)] = 0
    random_samples = np.zeros(A.shape[0],dtype=np.bool)
    random_samples[:min(s,A.shape[0])] = True
    np.random.shuffle(random_samples)
    A = A[random_samples]
    B = B[random_samples]
    dA = 1 - A.dot(A.T)
    dA = np.exp(-dA**2/2.)
    dB = 1 - B.dot(B.T)
    dB = np.exp(-dB**2/2.)
    del A,B
    if method == 'spectral':
        n = max(5,min(30,X.shape[1]/50))
        lA = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dA)
        lB = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dB)
    elif method == 'ap':
        lA = AffinityPropagation(affinity='precomputed').fit_predict(dA)
        lB = AffinityPropagation(affinity='precomputed').fit_predict(dB)
    return adjusted_mutual_info_score(lA,lB)
mathutil.py 文件源码 项目:QR-Replace 作者: Metruption 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def constructParallelograms(dataset):
    '''
    @params
        dataset is a list of points to find clusters in
    returns a list of the parallelograms found.
    '''
    af = AffinityPropagation().fit(dataset)
    print(af.cluster_centers_, af.labels_, len(af.cluster_centers_))
    clusters = []
    count = 0
    while (count < len(af.cluster_centers_)):
        pointlist = af.cluster_centers_[count].tolist()
        clusters += [Point(pointlist[0], pointlist[1])]
        count += 1

    print(clusters)
    return extrapolateParallelogram(clusters[0], clusters[1], clusters[2])
test_base.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")
cluster.py 文件源码 项目:lol-category 作者: vonum 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def affinity_prop(data):
  af = AffinityPropagation(damping=0.5, convergence_iter=15, affinity='euclidean').fit(data)
  print 'Affinity Propagation'
  print metrics.silhouette_score(data, af.labels_)
  print collections.Counter(af.labels_)

# mean_shift(np.array(data))
# affinity_prop(np.array(data))
test_sap.py 文件源码 项目:pysapc 作者: bioinfocao 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def clusterSimilarityWithSklearnAPC(data_file,damping=0.9,max_iter=200,convergence_iter=15,preference='min'):
    """
    Compare Sparse Affinity Propagation (SAP) result with SKlearn Affinity Propagation (AP) Clustering result.
    Please note that convergence condition for Sklearn AP is "no change in the number of estimated clusters",
    for SAP the condition is "no change in the cluster assignment". 
    So SAP may take more iterations and the there will be slightly difference in final cluster assignment (exemplars for each sample).
    """
    # loading data
    simi_mat=loadMatrix(data_file)
    simi_mat_dense=simi_mat.todense()

    # get preference
    if preference=='min':
        preference=np.min(simi_mat_dense)
    elif preference=='median':
        preference=np.median(simi_mat_dense)

    print('{0}, start SKlearn Affinity Propagation'.format(datetime.now()))
    af=AffinityPropagation(damping=damping, preference=preference, affinity='precomputed',verbose=True)
    af.fit(simi_mat_dense)
    cluster_centers_indices,labels = af.cluster_centers_indices_,af.labels_
    sk_exemplars=np.asarray([cluster_centers_indices[i] for i in labels])
    print('{0}, start Fast Sparse Affinity Propagation Cluster'.format(datetime.now()))
    sap=SAP(preference=preference,convergence_iter=convergence_iter,max_iter=max_iter,damping=damping,verboseIter=100)
    sap_exemplars=sap.fit_predict(simi_mat_dense)

    # Caculate similarity between sk_exemplars and sap_exemplars
    exemplars_similarity=sparseAP_cy.arrSamePercent(np.array(sk_exemplars), np.array(sap_exemplars))

    return exemplars_similarity
test_sap.py 文件源码 项目:pysapc 作者: bioinfocao 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testDense():
    """
    Test dense similarity matrix, Compare FSAPC result with SKlearn Affinity Propagation (AP) Clustering result
    """
    dense_similarity_matrix_file=os.path.join(os.path.dirname(os.path.abspath(__file__)),'FaceClusteringSimilarities.txt')
    exemplars_similarity=clusterSimilarityWithSklearnAPC(data_file=dense_similarity_matrix_file,damping=0.9,max_iter=200,convergence_iter=15,preference='min')
    print("Exemplar label similarity between sklearn.cluster.AffinityPropagation and SAP is: {0}".format(exemplars_similarity))
sklearn_basic.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def affinity(fig):
    global X_iris, geo
    ax = fig.add_subplot(geo + 3, projection='3d', title='affinity')
    affinity = cluster.AffinityPropagation(preference=-50)
    affinity.fit(X_iris)
    res = affinity.labels_
    for n, i in enumerate(X_iris):
        ax.scatter(*i[: 3], c='bgrcmyk'[res[n] % 7], marker='o')

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')
    return res
affinity_mapping.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def compute_local_affinity(V):
    global damping

    cluster_args = {"damping": damping}
    cluster = cluster_clf(**cluster_args)

    DV = cdist(V, V, metric='cosine')
    z_labels = cluster.fit_predict(DV)

    # print "{} unique labels found".format(np.unique(z_labels).shape)
    return V, z_labels
affinity_mapping.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def compute_affinity(item):

    text, f_idx, table_name, f_sql = item
    tokens = text.split()

    # Find out which tokens are defined
    valid_tokens = [w for w in tokens if w in M]

    collections.Counter(valid_tokens)
    labels = np.array(list(set(valid_tokens)))

    token_clf_index = np.array([M.word2index[w]
                                for w in labels])

    if not labels.size:
        msg = "Document has no valid tokens! This is problem."
        raise ValueError(msg)

    V = np.array([M[w] for w in labels])
    DV = cdist(V, V, metric='cosine')

    # Values are sometimes "slightly" less than zero due to rounding
    DV[DV < 0] = 0

    cluster_args = {"damping": damping}
    cluster = cluster_clf(**cluster_args)

    y_labels = cluster.fit_predict(DV)

    data = {}

    data = {
        "token_clf_index": token_clf_index,
        "y_labels": y_labels,
    }

    return f_idx, f_sql, data
analyze_clustering.py 文件源码 项目:indefinite-pronouns 作者: dnrb 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_cluster_assignments(sim_matrix, parameters):
    """
    (np.array, list of int) -> list of int
    sim_matrix: list of list of float -- similarity matrix between exemplars
    parameters: list of parameters in the format ["method:method_name", 
            "algo:algo_name", "k:num_clusters", "damping:damping"]
            where order doesn't matter
            (k and damping only relevant for certain clustering methods)
            the possible values for each parameter are listed in the
            function below.

    Returns a list of integers. The integer at each index of the list corresponds
    to the cluster number of the exemplar at the same index in sim_matrix.
    """

    algorithm = next((re.split(':',f)[1] for f in parameters if f[:4] == 'algo'), 'ap')
    # from { 'hierarchical', 'kmeans', 'ap', 'ward' }
    method = next((re.split(':',f)[1] for f in parameters if f[:6] == 'method'), 'single')
    # from {'single', 'complete', 'average'} (only relevant for hierarchical clustering)
    kMk = next((int(re.split(':',f)[1]) for f in parameters if f[:1] == 'k'), 8)
    # any integer <= the data length
    damping = next((re.split(':',f)[1] for f in parameters if f[:4] == 'damping'), 0.5)
    # only relevant for AP -- in [0.5,1]
    #
    if algorithm == 'hierarchical':
        clustering = hierarchy.linkage(sim_matrix, method)
        k = get_k(clustering, 20)
        cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1
    elif algorithm == 'kmeans':
        cluster_assignments = KMeans(n_clusters = kMk).fit_predict(sim_matrix)
    elif algorithm == 'ap':
        cluster_assignments = AffinityPropagation().fit_predict(sim_matrix)
    elif algorithm == 'ward':
        clustering = hierarchy.ward(sim_matrix)
        k = get_k(clustering, 20)
        cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1
    return cluster_assignments
evaluate.py 文件源码 项目:w2vec-similarity 作者: jayantj 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def affinity_propagation_clusters(similarity_matrix):
  return AffinityPropagation(affinity='precomputed').fit(similarity_matrix)
cluster.py 文件源码 项目:Particle-Picking-Cryo-EM 作者: hqythu 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def main():
    centers = get_list('out_center.txt')
    labels = get_list('142-label.txt')
    judge(centers, labels)
    n_class = int(len(centers) * 0.18)
    est = KMeans(n_clusters=n_class, max_iter=1000)
    est.fit(centers)
    new_list = []
    for x, y in est.cluster_centers_:
        min_num = 10000
        min_x = -1
        min_y = -1
        for x_, y_ in centers:
            dist = distance(x, y, x_, y_)
            if (dist < min_num) or (min_x == -1):
                min_num = dist
                min_x = x_
                min_y = y_
        new_list.append([min_x, min_y])
    judge(new_list, labels)
    judge(est.cluster_centers_, labels)

    # db = DBSCAN(eps=0.3, min_samples=180).fit(centers)
    # print(db.core_sample_indices_)
    # judge(new_list, labels)
    # print(est.cluster_centers_)
    # save_list('result.txt', est.cluster_centers_)
    # af = AffinityPropagation(preference=180).fit(centers)
    # judge(af.cluster_centers_, labels)
sparse_affinity_propagation.py 文件源码 项目:icing 作者: slipguru 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, damping=.5, max_iter=200, convergence_iter=15,
                 copy=True, preference=None, affinity='euclidean',
                 verbose=False, convergence_percentage=0.999999):
        super(AffinityPropagation, self).__init__(
            damping=damping,
            max_iter=max_iter,
            convergence_iter=convergence_iter,
            copy=copy,
            verbose=verbose,
            preference=preference,
            affinity=affinity)
        self.convergence_percentage = convergence_percentage
sparse_affinity_propagation.py 文件源码 项目:icing 作者: slipguru 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fit(self, X, **kwargs):
        """Apply affinity propagation clustering.

        Create affinity matrix from negative euclidean distances if required.

        Parameters
        ----------
        X: array-like or sparse matrix,
                shape (n_samples, n_features) or (n_samples, n_samples)
            Data matrix or, if affinity is ``precomputed``, matrix of
            similarities / affinities.
        """
        if not issparse(X):
            return super(AffinityPropagation, self).fit(X, **kwargs)

        # Since X is sparse, this converts it in a coo_matrix if required
        X = check_array(X, accept_sparse='coo')
        if self.affinity == "precomputed":
            self.affinity_matrix_ = X
        elif self.affinity == "euclidean":
            self.affinity_matrix_ = coo_matrix(
                -euclidean_distances(X, squared=True))
        else:
            raise ValueError("Affinity must be 'precomputed' or "
                             "'euclidean'. Got %s instead"
                             % str(self.affinity))

        self.cluster_centers_indices_, self.labels_, self.n_iter_ = \
            sparse_ap(
                self.affinity_matrix_, self.preference, max_iter=self.max_iter,
                convergence_iter=self.convergence_iter, damping=self.damping,
                copy=self.copy, verbose=self.verbose, return_n_iter=True,
                convergence_percentage=self.convergence_percentage)

        if self.affinity != "precomputed":
            self.cluster_centers_ = X.data[self.cluster_centers_indices_].copy()

        return self
ClasteringCalculator.py 文件源码 项目:TextStageProcessor 作者: mhyhre 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def make_aa_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'affinity_propagation/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = Normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        aa_clusterizator = AffinityPropagation(damping=self.aa_damping,
                                               max_iter=self.aa_max_iter,
                                               convergence_iter=self.aa_no_change_stop)

        predict_result = aa_clusterizator.fit_predict(X)
        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)
time_freq.py 文件源码 项目:apicultor 作者: sonidosmutantes 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def find_number_of_sources(cosine_distance):
    cos_dist = np.resize(cosine_distance, new_shape = (len(cosine_distance), len(cosine_distance))) 
    ap = AP(affinity = 'precomputed').fit(cos_dist)
    counter = Counter(ap.labels_).most_common()
    source = 0
    for i in xrange(len(counter)):
        if counter[i][1] == counter[0][1]:     
            source += 1
    return source
document_clustering.py 文件源码 项目:text-analytics-with-python 作者: dipanjanS 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def affinity_propagation(feature_matrix):

    sim = feature_matrix * feature_matrix.T
    sim = sim.todense()
    ap = AffinityPropagation()
    ap.fit(sim)
    clusters = ap.labels_          
    return ap, clusters

# get clusters using affinity propagation
keyterm_clustering.py 文件源码 项目:contextual-advertising-deploy 作者: andreicnica 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cluster_keyterms(keyterms, word2vec_model):
    '''
    This function takes a list of keyterms, filters out only the words that can be used in the model
    and clusters them
    :param
    keyterms : list of keyterms in dictionary format. They contain the following details: lemma_string, pos, len,
    cvalue, words, tf, lemma_list
    word2vec_model : embedding model
    :return:
    cluster of keyterms
    '''
    from sklearn import cluster

    #filter keyterms to work with the embedding model
    filtered_keyterms = filter_keyterms_byVocab(keyterms, word2vec_model.vocab)
    X = []
    for kt1 in filtered_keyterms:
        line = []
        for kt2 in filtered_keyterms:
            sim = word2vec_model.n_similarity(kt1, kt2)
            line.append(sim)

        X.append(line)

    # preference = [np.amin(X)] * len(filtered_keyterms)
    preference = [np.median(X)] * len(filtered_keyterms)

    print "Start Affinity Propagation ..."
    af = cluster.AffinityPropagation(affinity="precomputed", damping=0.5, preference = preference)
    af.fit(X)
    print "Finished affinity propagation"

    af_cluster_indices = af.cluster_centers_indices_
    af_labels = af.labels_
    n_clusters = len(af_cluster_indices)

    clusters = []

    for i in range(n_clusters):
        cluster_center_1 = filtered_keyterms[af_cluster_indices[i]]

        ## compute cluster composition
        cluster_members = []
        for ktIdx in range(len(af_labels)):
            if af_labels[ktIdx] == i:
                cluster_members.append(filtered_keyterms[ktIdx])

        cluster_data = {
            "idx" : i,
            "center": cluster_center_1,
            "members": cluster_members,
            "len": len(cluster_members)
        }

        clusters.append(cluster_data)

    return clusters


# if __name__ == "__main__":
#     process_keyterm_clusters(GENERATION_NT_CANDIDATES)
cluster.py 文件源码 项目:fitr 作者: abrahamnunes 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='precomputed', verbose=False):
        self.algorithm = AP(damping=damping,
                            max_iter=max_iter,
                            convergence_iter=convergence_iter,
                            copy=copy,
                            preference=preference,
                            affinity=affinity,
                            verbose=verbose)
stratified_rand.py 文件源码 项目:sampling 作者: e-baumer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_stratum(self, column_names, **kwargs):
        '''
        Use affinity propagation to find number of strata for each column. 
        column_names is a list of the covariates to be split into strata and 
        used for classification. This funciton adds a column to the data frame
        for each column as column_name_strata that gives the strata designation
        for that variable.  The whole data frame is returned.
        '''

        for colname in column_names:
            X = self.data[colname].reshape(-1, 1)

            if np.isnan(X).any():
                raise ValueError("There are NaN values in self.data[%s] that the \
                                  clustering algorithm can't handle" % colname)

            elif np.unique(self.data[colname]).shape[0] <=2:
                string_name = colname+'_strata'
                self.data[string_name] = self.data[colname].astype(int)

            else:
                af_model = AP(damping = 0.9)
                strata_groups = af_model.fit(X)

                #cluster_centers_indices = af.cluster_centers_indices_
                #n_clusters_ = len(cluster_centers_indices)

                string_name = colname+'_strata'
                self.data[string_name] = strata_groups.labels_

        return self.data


    #In the main function, you need to call create_stratum before create_unique_strata
clusters.py 文件源码 项目:extract 作者: dblalock 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def makeAffinityProp(X=None, k=-1):
    return cluster.AffinityPropagation(damping=.9, preference=-200)
clusters.py 文件源码 项目:extract 作者: dblalock 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def makeClusterers(X, k=2):
    return [('MiniBatchKMeans', makeKMeans(X, k)),
            ('AffinityPropagation', makeAffinityProp()),
            ('MeanShift', makeMeanShift(X)),
            ('SpectralClustering', makeSpectral(X, k)),
            ('Ward', makeWard(X, k)),
            ('AgglomerativeAvg', makeAvgLinkage(X, k)),
            ('AgglomerativeMax', makeMaxLinkage(X, k)),
            ('AgglomerativeWard', makeWardLinkage(X, k)),
            ('DBSCAN', makeDBScan())]
utils.py 文件源码 项目:errorgeopy 作者: alpha-beta-soup 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def affinity_propagation(location, location_callback):
    """Returns one or more clusters of a set of points, using an affinity
    propagation algorithm.
    The result is sorted with the first value being the largest cluster.

    Returns:
        A list of NamedTuples (see get_cluster_named_tuple for a definition
        of the tuple).
    """
    pts = location._tuple_points()
    if not pts:
        return None
    X = np.array(pts).reshape((len(pts), len(pts[0])))
    if np.any(np.isnan(X)) or not np.all(np.isfinite(X)):
        return None
    X = Imputer().fit_transform(X)
    X = X.astype(np.float32)
    afkwargs = {
        'damping': 0.5,
        'convergence_iter': 15,
        'max_iter': 200,
        'copy': True,
        'preference': None,
        'affinity': 'euclidean',
        'verbose': False
    }
    af = AffinityPropagation(**afkwargs).fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    clusters = []
    for cluster_id, cluster_centre in enumerate(af.cluster_centers_):
        locations = []
        for j, label in enumerate(af.labels_):
            if not label == cluster_id:
                continue
            locations.append(location.locations[j])
        if not locations:
            continue
        clusters.append(cluster_named_tuple()(label=cluster_id,
                                              centroid=Point(cluster_centre),
                                              location=location_callback(
                                                  locations)))
    return clusters
SoundSimilarity.py 文件源码 项目:apicultor 作者: sonidosmutantes 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def plot_similarity_clusters(desc1, desc2, plot = None):
    """
    find similar sounds using Affinity Propagation clusters

    :param desc1: first descriptor values
    :param desc2: second descriptor values
    :returns:
      - euclidean_labels: labels of clusters
    """ 

    if plot == True:
        print (Fore.MAGENTA + "Clustering")
    else:
        pass

    min_max = preprocessing.scale(np.vstack((desc1,desc2)).T, with_mean=False, with_std=False)          
    pca = PCA(n_components=2, whiten=True)
    y = pca.fit(min_max).transform(min_max)

    euclidean = AffinityPropagation(convergence_iter=1800, affinity='euclidean')                           
    euclidean_labels= euclidean.fit_predict(y)

    if plot == True:

        time.sleep(5)  

        print (Fore.WHITE + "Cada número representa el grupo al que pertence el sonido como ejemplar de otro/s. El grupo '0' esta coloreado en azul, el grupo '1' esta coloreado en rojo, el grupo '2' esta coloreado en amarillo. Observa el ploteo para ver qué sonidos son ejemplares de otros")
        print np.vstack((euclidean_labels,files)).T

        time.sleep(6)

        plt.scatter(y[euclidean_labels==0,0], y[euclidean_labels==0,1], c='b')
        plt.scatter(y[euclidean_labels==1,0], y[euclidean_labels==1,1], c='r')
        plt.scatter(y[euclidean_labels==2,0], y[euclidean_labels==2,1], c='y')
        plt.scatter(y[euclidean_labels==3,0], y[euclidean_labels==3,1], c='g')
        plt.show()
    else:
        pass

    return euclidean_labels


# save clusters files in clusters directory
AffinityPropagation.py 文件源码 项目:ProjectOfDataMining 作者: IljaNovo 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def compute_affinity_propagation(preference_, X):
    # DATA FILLING
    #text = io.Input.local_read_text_file(inputFilePath)
    #input_array = text.split('\n')
    centers = [[1, 1], [-1, -1], [1, -1]]
    n_samples = 300
    #Make Blobs used for generating of labels_true array
    if (X == None):
        X, labels_true = make_blobs(n_samples = n_samples, centers=centers, cluster_std=1, random_state=0)
        print("Data is none!!!")
        print("Generating " + str(n_samples) + " samples")
    else :
        data, labels_true = make_blobs(n_samples=len(X), centers=centers, cluster_std=1, random_state=0)
    #slist = list()
    #for line in X:
    #    slist.append(line)
    #io.Output.write_array_to_txt_file("clustering\\Affinity_Propagation\\input_data1.txt", slist)
    #float_array = []
    #for line in input_array:
    #    float_line = [float(i) for i in line.split(' ')]
    #    float_array.append(float_line)
    #X = array(float_array)

    af = AffinityPropagation(preference=preference_).fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    print('Estimated number of clusters: %d' % n_clusters_)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
    print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
#    print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
    print("Fowlkes Mallows Score: %0.3f" % metrics.fowlkes_mallows_score(labels_true, labels))

    plt.close('all')
    plt.figure(1)
    plt.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
        plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
        plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14)
        for x in X[class_members]:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()
song2vec_operator.py 文件源码 项目:MusicTaster 作者: JayveeHe 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def cluster_song_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
        """
        ??????????????
        Args:
            playlist_id: ??id
            cluster_n:???
            is_detailed: ???????????

        Returns:
            ??????
        """
        playlist_obj = playlist_detail(playlist_id)
        song_list = []
        vec_list = []
        song_info_dict = {}
        ap_cluster = AffinityPropagation()
        data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
        for item in playlist_obj['tracks']:
            song = item['name'].lower()
            song_info_dict[song] = {
                'name': song,
                'artist': item['artists'][0]['name'],
                'id': item['id'],
                'album_img_url': item['album']['picUrl'],
                'site_url': 'http://music.163.com/#/song?id=%s' % item['id']
            }
            # print song
            if song not in song_list:
                song_list.append(song)
                # print self.song2vec_model.vocab.get(song)
                # print self.song2vec_model.syn0norm == None
                if self.song2vec_model.vocab.get(song) and len(self.song2vec_model.syn0norm):
                    song_vec = self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index]
                else:
                    data_process_logger.warn(
                        'The song %s of playlist-%s is not in dataset' % (song, playlist_obj['name']))
                    song_vec = [0 for i in range(self.song2vec_model.vector_size)]
                vec_list.append(song_vec)
        # song_list = list(song_list)
        if len(vec_list) > 1:
            cluster_result = ap_cluster.fit(vec_list, song_list)
            cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
            for i in range(len(cluster_result.labels_)):
                label = cluster_result.labels_[i]
                index = i
                cluster_array[label].append(song_list[i])
            return cluster_array, playlist_obj['name'], song_info_dict
        else:
            return [song_list], playlist_obj['name'], song_info_dict
song2vec_operator.py 文件源码 项目:MusicTaster 作者: JayveeHe 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cluster_artist_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
        """
        ??????????????
        Args:
            playlist_id: ??id
            cluster_n:???
            is_detailed: ????????

        Returns:
            ??????
        """
        playlist_obj = playlist_detail(playlist_id)
        artist_list = []
        vec_list = []
        ap_cluster = AffinityPropagation()
        data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
        for item in playlist_obj['tracks']:
            artist = item['artists'][0]['name'].lower()
            # print artist
            if artist not in artist_list:
                artist_list.append(artist)
                # print self.song2vec_model.vocab.get(artist)
                # print self.song2vec_model.syn0norm == None
                if self.artist2vec_model.vocab.get(artist) and len(self.artist2vec_model.syn0norm):
                    artist_vec = self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index]
                else:
                    data_process_logger.warn(
                        'The artist %s of playlist-%s is not in dataset' % (artist, playlist_obj['name']))
                    artist_vec = [0 for i in range(self.artist2vec_model.vector_size)]
                vec_list.append(artist_vec)
        # artist_list = list(artist_list)
        # vec_list = list(vec_list)
        if len(vec_list) > 1:
            cluster_result = ap_cluster.fit(vec_list, artist_list)
            cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
            for i in range(len(cluster_result.labels_)):
                label = cluster_result.labels_[i]
                index = i
                cluster_array[label].append(artist_list[i])
            return cluster_array, playlist_obj['name'], {}
        else:
            return [artist_list], playlist_obj['name'], {}


问题


面经


文章

微信
公众号

扫码关注公众号