python类TSNE的实例源码

recipe_clustering.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def tsne_cluster_cuisine(df,sublist):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)

    palette = sns.color_palette("hls", len(sublist))
    plt.figure(figsize=(10,10))
    for i,cuisine in enumerate(sublist):
        plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
        tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
    plt.legend()

#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
visualize.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def word_cloud(word_embedding_matrix, vocab, s, save_file='scatter.png'):
    words = [(i, vocab[i]) for i in s]
    model = TSNE(n_components=2, random_state=0)
    #Note that the following line might use a good chunk of RAM
    tsne_embedding = model.fit_transform(word_embedding_matrix)
    words_vectors = tsne_embedding[np.array([item[1] for item in words])]

    plt.subplots_adjust(bottom = 0.1)
    plt.scatter(
        words_vectors[:, 0], words_vectors[:, 1], marker='o', cmap=plt.get_cmap('Spectral'))

    for label, x, y in zip(s, words_vectors[:, 0], words_vectors[:, 1]):
        plt.annotate(
            label,
            xy=(x, y), xytext=(-20, 20),
            textcoords='offset points', ha='right', va='bottom',
            fontsize=20,
            # bbox=dict(boxstyle='round,pad=1.', fc='yellow', alpha=0.5),
            arrowprops=dict(arrowstyle = '<-', connectionstyle='arc3,rad=0')
            )
    plt.show()
    # plt.savefig(save_file)
vae_plots.py 文件源码 项目:pyro 作者: uber 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_tsne(z_mu, classes, name):
    import numpy as np
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from sklearn.manifold import TSNE
    model_tsne = TSNE(n_components=2, random_state=0)
    z_states = z_mu.data.cpu().numpy()
    z_embed = model_tsne.fit_transform(z_states)
    classes = classes.data.cpu().numpy()
    fig666 = plt.figure()
    for ic in range(10):
        ind_vec = np.zeros_like(classes)
        ind_vec[:, ic] = 1
        ind_class = classes[:, ic] == 1
        color = plt.cm.Set1(ic)
        plt.scatter(z_embed[ind_class, 0], z_embed[ind_class, 1], s=10, color=color)
        plt.title("Latent Variable T-SNE per Class")
        fig666.savefig('./vae_results/'+str(name)+'_embedding_'+str(ic)+'.png')
    fig666.savefig('./vae_results/'+str(name)+'_embedding.png')
common.py 文件源码 项目:singlecell-dash 作者: czbiohub 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def compute_bulk_smushing(self):
        """Get average signal from each plate ('bulk') and find 2d embedding"""

        grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])

        if os.path.exists(self.bulk_smushed_cache_file):
            smushed = pd.read_csv(self.bulk_smushed_cache_file, names=[0, 1],
                                  header=0, index_col=0)
            # if the set of plates hasn't changed, return the cached version
            if set(grouped.groups) == set(smushed.index):
                return smushed

        # if the cache was missing or invalid, compute a new projection
        medians = grouped.median()
        smusher = TSNE(random_state=0, perplexity=10, metric='cosine')
        smushed = pd.DataFrame(smusher.fit_transform(medians),
                               index=medians.index)

        smushed.to_csv(self.bulk_smushed_cache_file)

        return smushed
common.py 文件源码 项目:singlecell-dash 作者: czbiohub 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def compute_cell_smushing(self):
        """Within each plate, find a 2d embedding of all cells"""
        grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])

        if os.path.exists(self.cell_smushed_cache_file):
            smusheds = pd.read_pickle(self.cell_smushed_cache_file)
            # if nothing is missing, return the cached version
            if not set(grouped.groups) - set(smusheds):
                return smusheds
        else:
            smusheds = {}

        for plate_name, genes_subset in grouped:
            if plate_name not in smusheds:
                cell_smusher = TSNE(metric='cosine', random_state=0)
                cell_smushed = pd.DataFrame(
                    cell_smusher.fit_transform(genes_subset),
                    index=genes_subset.index)
                smusheds[plate_name] = cell_smushed

        pd.to_pickle(smusheds, self.cell_smushed_cache_file)

        return smusheds
model.py 文件源码 项目:tensorflow_tutorial 作者: lpty 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def gen(self):
        embedding, _ = self.embedding()
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, tf.train.latest_checkpoint('.'))
            embedding = sess.run(embedding)
        # ???
        data = embedding[:self.viz_words, :]
        # ???????
        tsne = TSNE(n_components=2, init='pca', random_state=0)
        embed_tsne = tsne.fit_transform(data)
        # ??
        plt.subplots(figsize=(10, 10))
        for idx in range(self.viz_words):
            plt.scatter(*embed_tsne[idx, :], color='steelblue')
            plt.annotate(self.train_text.int_to_vocab[idx], (embed_tsne[idx, 0], embed_tsne[idx, 1]), alpha=0.7)
        plt.show()
visualize_space.py 文件源码 项目:Msc_Multi_label_ZeroShot 作者: thomasSve 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def main():

    args = parse_args()

    print('Called with args:')
    print(args)
    lang_db = get_language_model(args.lang_name)
    imdb = get_imdb(args.imdb_name)

    # Get words in space
    vocabulary = imdb.get_labels(args.space)

    # Get features for words
    wv = [lang_db.word_vector(w) for w in vocabulary]
    from sklearn.metrics.pairwise import cosine_similarity
    from scipy import spatial
    #spatial.distance.cosine(dataSetI, dataSetII)
    tsne = TSNE(n_components=2, random_state=0)
    np.set_printoptions(suppress=True)
    Y = tsne.fit_transform(wv)

    plt.scatter(Y[:, 0], Y[:, 1])
    for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
        plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
    plt.show()
visualize_countries.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main(we_file='glove_model_50.npz', w2i_file='glove_word2idx_50.json'):
    words = ['japan', 'japanese', 'england', 'english', 'australia', 'australian', 'china', 'chinese', 'italy', 'italian', 'french', 'france', 'spain', 'spanish']

    with open(w2i_file) as f:
        word2idx = json.load(f)

    npz = np.load(we_file)
    W = npz['arr_0']
    V = npz['arr_1']
    We = (W + V.T) / 2

    idx = [word2idx[w] for w in words]
    # We = We[idx]

    tsne = TSNE()
    Z = tsne.fit_transform(We)
    Z = Z[idx]
    plt.scatter(Z[:,0], Z[:,1])
    for i in xrange(len(words)):
        plt.annotate(s=words[i], xy=(Z[i,0], Z[i,1]))
    plt.show()
visualize.py 文件源码 项目:auDeep 作者: auDeep 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def take_action(self, parsed_args):
        if not parsed_args.input.exists():
            raise IOError("failed to open data set at {}".format(parsed_args.input))

        data_set = load(parsed_args.input)

        features = np.reshape(data_set.features, [data_set.num_instances, -1])

        if features.shape[1] > 50:
            self.log.info("applying PCA")

            pca = PCA(n_components=200)
            pca.fit(features)
            features = pca.transform(features)

        self.log.info("computing T-SNE embedding")
        tsne = TSNE(perplexity=parsed_args.perplexity,
                    learning_rate=parsed_args.learning_rate,
                    verbose=self.app_args.verbose_level)

        embedding = tsne.fit_transform(features)

        self.log.info("plotting embedding")
        self.plot_with_labels(data_set, embedding)
evaluate.py 文件源码 项目:motion-classification 作者: matthiasplappert 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _plot_proto_symbol_space(coordinates, target_names, name, args):
    # Reduce to 2D so that we can plot it.
    coordinates_2d = TSNE().fit_transform(coordinates)

    n_samples = coordinates_2d.shape[0]
    x = coordinates_2d[:, 0]
    y = coordinates_2d[:, 1]
    colors = cm.rainbow(np.linspace(0, 1, n_samples))

    fig = plt.figure(1)
    plt.clf()
    ax = fig.add_subplot(111)
    dots = []
    for idx in xrange(n_samples):
        dots.append(ax.plot(x[idx], y[idx], "o", c=colors[idx], markersize=15)[0])
        ax.annotate(target_names[idx],  xy=(x[idx], y[idx]))
    lgd = ax.legend(dots, target_names, ncol=4, numpoints=1, loc='upper center', bbox_to_anchor=(0.5,-0.1))
    ax.grid('on')

    if args.output_dir is not None:
        path = os.path.join(args.output_dir, name + '.pdf')
        print('Saved plot to file "%s"' % path)
        fig.savefig(path, bbox_extra_artists=(lgd,), bbox_inches='tight')
    else:
        plt.show()
sample_tsne.py 文件源码 项目:gan-error-avoidance 作者: aleju 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def embed_or_load_cache(codes, gen, r_idx, batch_size, save_path):
    cache_fp = os.path.join(save_path, 'tsne_plots', 'embedded_points_r%02d.csv' % (r_idx,))
    if os.path.isfile(cache_fp):
        lines = open(cache_fp).readlines()
        lines = [line.strip().split(",") for line in lines[1:]]
        vals = [(float(x), float(y)) for (x, y) in lines]
        return np.array(vals, dtype=np.float32)
    else:
        codes_r = generate_codes_by_r(gen, codes, r_idx, batch_size)

        print(codes_r.shape)
        print("Embedding %s via TSNE..." % (str(codes_r.shape),))
        tsne = TSNE(perplexity=40, n_iter=10000, learning_rate=4000, verbose=True)
        #tsne = TSNE(perplexity=40, n_iter=10000, n_jobs=4, verbose=True)
        #tsne = PCA(n_components=2)
        codes_r_2d = tsne.fit_transform(codes_r.astype(np.float64))
        print("shape after embedding: %s" % (str(codes_r_2d.shape),))

        with open(cache_fp, "w") as f:
            f.write("#x,y\n")
            for i in xrange(codes_r.shape[0]):
                f.write("%.6f,%.6f\n" % (codes_r_2d[i, 0], codes_r_2d[i, 1]))
        return codes_r_2d
vgg16svm.py 文件源码 项目:cancer_nn 作者: tanmoyopenroot 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def plotInputData(X, Y, title, data_len):

    time_start = time.time()   
    X = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300).fit_transform(X)
    print("After Reduction Data Shape : {0}".format(X.shape))    
    print 't-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start) 

    # Main scatter plot and plot annotation
    f, ax = plt.subplots(figsize=(7, 7))
    ax.scatter(X[:data_len / 2, 0] * 10, X[:data_len / 2, 1] * 10, marker = 'o', color = 'green', s=30, alpha=0.5)
    ax.scatter(X[data_len / 2:, 0] * 10, X[data_len / 2:, 1] * 10, marker = '^', color = 'blue', s=30, alpha=0.5)
    plt.legend(["Melanoma", "Benign"], loc='upper right') 
    plt.title(title)
    plt.ylabel('Y')
    plt.xlabel('X')

    # plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
    # plt.xlabel('X')
    # plt.ylabel('Y')
    # plt.title('SVC Data Plot')
    plt.show()
visualization.py 文件源码 项目:DLDisambiguation 作者: Labyrinth108 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def main():
    # model_file = "../data/word2vec/character.model"
    model_file = "../data/word2vec_new/word.model"
    checkSimilarity(model_file, "?")

    # character_wv_file = '../data/word2vec/character_model.txt'
    # word_wv_file = '../data/word2vec/word_model.txt'
    #
    # embeddings_file = word_wv_file
    # wv, vocabulary = load_embeddings(embeddings_file)
    #
    # tsne = TSNE(n_components=2, random_state=0)
    # np.set_printoptions(suppress=True)
    # Y = tsne.fit_transform(wv[:1000, :])
    #
    # plt.scatter(Y[:, 0], Y[:, 1])
    # for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
    #     plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
    # plt.show()
tsne.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, ax=None, decompose='svd', decompose_by=50, classes=None,
               colors=None, colormap=None, **kwargs):
        """
        Initialize the TSNE visualizer with visual hyperparameters.
        """
        super(TSNEVisualizer, self).__init__(ax=ax, **kwargs)

        # Visualizer parameters
        self.classes_ = classes
        self.n_instances_ = 0

        # Visual Parameters
        # TODO: Only colors currently works to select the colors of classes.
        self.colors = colors
        self.colormap = colormap

        # TSNE Parameters
        self.transformer_ = self.make_transformer(decompose, decompose_by, kwargs)
tsne.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def finalize(self, **kwargs):
        """
        Finalize the drawing by adding a title and legend, and removing the
        axes objects that do not convey information about TNSE.
        """

        # Add a title
        self.set_title(
            "TSNE Projection of {} Documents".format(self.n_instances_)
        )

        # Remove the ticks
        self.ax.set_yticks([])
        self.ax.set_xticks([])

        # Add the legend outside of the figure box.
        if self.classes_:
            box = self.ax.get_position()
            self.ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            self.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
simPlot.py 文件源码 项目:anime_recs 作者: Cpierse 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def tSNE_model(Vt,aid_dict):
    tsne_model = TSNE(n_components=2, verbose=1, random_state=0)
    tsne_V = tsne_model.fit_transform(np.transpose(Vt))
    # Put data in a pandas dataframe:
    tsne_df = pd.DataFrame(tsne_V, columns=['x', 'y'])
    # Save it:
    tsne_df.to_csv('results\\tsne_svd.csv')
    # Get anime names:
    con = sqlite3.connect('user_anime_data.db')
    cur = con.cursor()
    anime_data = cur.execute('SELECT Anime, Name, Score FROM animeData').fetchall()
    anime_data=dict([(x[0],(x[1],x[2])) for x in anime_data])
    anime_names = [anime_data[aid_dict[x]][0] for x in range(Vt.shape[1])]
    anime_scores = [anime_data[aid_dict[x]][1] for x in range(Vt.shape[1])]
    anime_ids = [aid_dict[x] for x in range(Vt.shape[1])]
    tsne_df['anime_name'] = anime_names
    tsne_df['anime_id'] = anime_ids
    tsne_df['rating'] = anime_scores
    return tsne_df

# Plotting the data:
visuals.py 文件源码 项目:VariationalAutoEncoder 作者: despoisj 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def computeTSNEProjectionOfLatentSpace(X, encoder, display=True):
    # Compute latent space representation
    print("Computing latent space projection...")
    X_encoded = encoder.predict(X)

    # Compute t-SNE embedding of latent space
    print("Computing t-SNE embedding...")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X_encoded)

    # Plot images according to t-sne embedding
    if display:
        print("Plotting t-SNE visualization...")
        fig, ax = plt.subplots()
        imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
        plt.show()
    else:
        return X_tsne

# Show dataset images with T-sne projection of pixel space
visuals.py 文件源码 项目:VariationalAutoEncoder 作者: despoisj 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def computeTSNEProjectionOfPixelSpace(X, display=True):
    # Compute t-SNE embedding of latent space
    print("Computing t-SNE embedding...")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X.reshape([-1,imageSize*imageSize*3]))

    # Plot images according to t-sne embedding
    if display:
        print("Plotting t-SNE visualization...")
        fig, ax = plt.subplots()
        imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
        plt.show()
    else:
        return X_tsne

# Reconstructions for samples in dataset
tSNE.py 文件源码 项目:artificio 作者: ankonzoid 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def plot_tsne(images, X, filename):

    def imscatter(x, y, images, ax=None, zoom=1.0):
        if ax is None:
            ax = plt.gca()
        x, y = np.atleast_1d(x, y)
        artists = []
        for x0, y0, img0 in zip(x, y, images):
            im = OffsetImage(img0, zoom=zoom)
            ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
            artists.append(ax.add_artist(ab))
        ax.update_datalim(np.column_stack([x, y]))
        ax.autoscale()
        return artists

    def plot_embedding(X, imgs, title=None):
        x_min, x_max = np.min(X, 0), np.max(X, 0)
        X = (X - x_min) / (x_max - x_min)

        plt.figure()
        ax = plt.subplot(111)
        for i in range(X.shape[0]):
            plt.text(X[i, 0], X[i, 1], ".", fontdict={'weight': 'bold', 'size': 9})
        if hasattr(offsetbox, 'AnnotationBbox'):
            imscatter(X[:,0], X[:,1], imgs, zoom=0.1, ax=ax)

        plt.xticks([]), plt.yticks([])
        if title is not None:
            plt.title(title)

    print("Computing t-SNE embedding")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X)
    plot_embedding(X_tsne, images, "t-SNE embedding of images")
    plt.savefig(filename, bbox_inches='tight')

# Driver
corpus.py 文件源码 项目:NNLM 作者: kanoh-k 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def plot(self, filename="./corpus/model/blog.png"):
        tsne = TSNE(perplexity=30, n_components=2, init="pca", n_iter=5000)
        plot_only=500
        low_dim_embeddings = tsne.fit_transform(self.final_embeddings[:plot_only, :])
        reversed_dictionary = dict(zip(self.dictionary.values(), self.dictionary.keys()))
        labels = [reversed_dictionary[i] for i in range(plot_only)]

        plt.figure(figsize=(18, 18))
        for i, label in enumerate(labels):
            x, y = low_dim_embeddings[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                        xy=(x, y),
                        xytext=(5, 2),
                        textcoords="offset points",
                        ha="right",
                        va="bottom")
        plt.savefig(filename)
        print("Scatter plot was saved to", filename)
word2vec.py 文件源码 项目:Tensorflow-Turitors 作者: Xls1994 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)

# TSNE ????
visualize_embedding.py 文件源码 项目:GEM 作者: palash1992 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_embedding2D(node_pos, node_colors=None, di_graph=None):
    node_num, embedding_dimension = node_pos.shape
    if(embedding_dimension > 2):
        print("Embedding dimension greater than 2, use tSNE to reduce it to 2")
        model = TSNE(n_components=2)
        node_pos = model.fit_transform(node_pos)

    if di_graph is None:
        # plot using plt scatter
        plt.scatter(node_pos[:, 0], node_pos[:, 1], c=node_colors)
    else:
        # plot using networkx with edge structure
        pos = {}
        for i in range(node_num):
            pos[i] = node_pos[i, :]
        if node_colors:
            nx.draw_networkx_nodes(di_graph, pos,
                                   node_color=node_colors,
                                   width=0.1, node_size=100,
                                   arrows=False, alpha=0.8,
                                   font_size=5)
        else:
            nx.draw_networkx(di_graph, pos, node_color=node_colors,
                             width=0.1, node_size=300, arrows=False,
                             alpha=0.8, font_size=12)
tsne.py 文件源码 项目:QScode 作者: PierreHao 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def main():
    f = open('label.txt','w')
    #target_names = np.array(args.names)
    X, target_names, y = getXY(args.image_dir)
    X = np.asfarray(X,dtype='float')
    colors = cm.gnuplot2(np.linspace(0, 1, len(target_names)))

    #X_pca = PCA(n_components=128).fit_transform(X)
    X_pca = X
    tsne = TSNE(n_components=2, init='random', random_state=0)
    X_r = tsne.fit_transform(X_pca)

    for c, i, target_name in zip(colors,
                             list(range(0, len(target_names))),
                             target_names):
        plt.scatter(X_r[y[i], 0], X_r[y[i], 1],
                c=c, label=str(i+1))
        f.write(target_name+'\n')
    plt.legend()
    plt.savefig("{}/10crop1.png".format('./'))
    f.close()
tsne.py 文件源码 项目:Ransome-killer 作者: gau820827 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def twoDB(Xtrain, Ytrain):


    pca = PCA()
    reduced = pca.fit_transform(Xtrain)


    tsne = TSNE()
    Z = tsne_divide(tsne, reduced, len(Ytrain))


    # Save the PCA vectors
    with open("packet_tsneBinfo"+str(reduce_number), "wb") as f:
        pickle.dump(Z, f)
        pickle.dump(Xtrain, f)
        pickle.dump(Ytrain, f)

    return
tsne.py 文件源码 项目:Ransome-killer 作者: gau820827 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def threeDB(Xtrain, Ytrain):


    pca = PCA()
    reduced = pca.fit_transform(Xtrain)

    tsne = TSNE(n_components=3)
    Z = tsne_divide(tsne, reduced, len(Ytrain))


    # Save the PCA vectors
    with open("packet_3DtsneBinfo"+str(reduce_number), "wb") as f:
        pickle.dump(Z, f)
        pickle.dump(Xtrain, f)
        pickle.dump(Ytrain, f)

    return
tsne.py 文件源码 项目:photinia 作者: XoriieInpottn 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def main(flags):
    with open(flags.emb_file, 'rb') as f:
        emb_dict = pickle.load(f)
    final_embeddings = []
    words = []
    for k, v in emb_dict.items():
        words.append(k)
        final_embeddings.append(v)
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    low_dim_embs = tsne.fit_transform(final_embeddings[:flags.plot_num])
    labels = words[:flags.plot_num]
    plot_with_labels(low_dim_embs, labels)
    return 0
recipe_clustering.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_bokeh(df,sublist,filename):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)
    #cannot use seaborn palette for bokeh
    palette =['red','green','blue','yellow']
    colors =[]
    for i in range(len(sublist)):
        for j in range(lenlist[i+1]-lenlist[i]):
            colors.append(palette[i])
    #plot with boken
    output_file(filename)
    source = ColumnDataSource(
            data=dict(x=tsne[:,0],y=tsne[:,1],
                cuisine = df_sub['cuisine'],
                recipe = df_sub['recipeName']))

    hover = HoverTool(tooltips=[
                ("cuisine", "@cuisine"),
                ("recipe", "@recipe")])

    p = figure(plot_width=1000, plot_height=1000, tools=[hover],
               title="flavor clustering")

    p.circle('x', 'y', size=10, source=source,fill_color=colors)

    show(p)
visualize.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    # markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
    plt.rc('legend',**{'fontsize':30})
    classes_to_visual = list(set(classes_to_visual))
    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_ids = dict(zip(classes_to_visual, range(C)))

    if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
        codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
    else:
        codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual:
        idx = np.array(labels) == c
        # idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=1, marker=markers[class_ids[c]],
                        markersize=10, label=c)
    legend = plt.legend(loc='upper right', shadow=True)
    # plt.title("tsne")
    # plt.savefig(save_file)
    plt.savefig(save_file, format='eps', dpi=2000)
    plt.show()
visualize.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_tsne_3d(doc_codes, doc_labels, classes_to_visual, save_file, maker_size=None, opaque=None):
    markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
    plt.rc('legend',**{'fontsize':20})
    colors = ['r', 'b', 'g', 'c', 'm', 'y', 'k']
    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers
    while True:
        if C <= len(colors):
            break
        colors += colors

    class_ids = dict(zip(classes_to_visual, range(C)))

    if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
        codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
    else:
        codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=3, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    fig = plt.figure(figsize=(10, 10), facecolor='white')
    ax = fig.add_subplot(111, projection='3d')

    # The problem is that the legend function don't support the type returned by a 3D scatter.
    # So you have to create a "dummy plot" with the same characteristics and put those in the legend.
    scatter_proxy = []
    for i in range(C):
        cls = classes_to_visual[i]
        idx = np.array(labels) == cls
        ax.scatter(X[idx, 0], X[idx, 1], X[idx, 2], c=colors[i], alpha=opaque[i] if opaque else 1, s=maker_size[i] if maker_size else 20, marker=markers[i], label=cls)
        scatter_proxy.append(mpl.lines.Line2D([0],[0], linestyle="none", c=colors[i], marker=markers[i], label=cls))
    ax.legend(scatter_proxy, classes_to_visual, numpoints=1)
    plt.savefig(save_file)
    plt.show()
visualize.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def DBN_plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]

    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_ids = dict(zip(classes_to_visual.keys(), range(C)))

    codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual.keys():
        idx = np.array(labels) == c
        # idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
                        markersize=6, label=classes_to_visual[c])
    legend = plt.legend(loc='upper center', shadow=True)
    plt.title("tsne")
    plt.savefig(save_file)
    plt.show()


问题


面经


文章

微信
公众号

扫码关注公众号