python类TSNE的实例源码

visualize.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def reuters_visualize_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    """
        Visualize the input data on a 2D PCA plot. Depending on the number of components,
        the plot will contain an X amount of subplots.
        @param doc_codes:
        @param number_of_components: The number of principal components for the PCA plot.
    """

    # markers = ["p", "s", "h", "H", "+", "x", "D"]
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]

    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_names = classes_to_visual.keys()
    class_ids = dict(zip(class_names, range(C)))
    class_names = set(class_names)
    codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if class_names.intersection(set(doc_labels[doc]))])

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual.keys():
        idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
                        markersize=6, label=classes_to_visual[c])
    legend = plt.legend(loc='upper center', shadow=True)
    plt.title("tsne")
    plt.savefig(save_file)
    plt.show()
common.py 文件源码 项目:histwords 作者: williamleif 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def fit_tsne(values):
    if not values:
        return

    start = time.time()
    mat = np.array(values)
    model = TSNE(n_components=2, random_state=0, learning_rate=150, init='pca')
    fitted = model.fit_transform(mat)
    print "FIT TSNE TOOK %s" % (time.time() - start)

    return fitted
tsne.py 文件源码 项目:agent-trainer 作者: lopespm 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def save_visualization_to_image(self, inputs, outputs, folder_path_for_result_image):
        print("Computing t-SNE embedding")
        x = np.array([state.reshape(-1, ) for state in inputs])
        y = outputs
        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
        x_tsne = tsne.fit_transform(x)
        self._tsne_plot_embedding(x=x_tsne,
                                  y=y,
                                  inputs=inputs,
                                  path_result_image=os.path.join(folder_path_for_result_image, "t-SNE.png"))
sample_latent.py 文件源码 项目:keras-molecules 作者: maxhodak 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def visualize_latent_rep(args, model, x_latent):
    print("pca_on=%r pca_comp=%d tsne_comp=%d tsne_perplexity=%f tsne_lr=%f" % (
        args.use_pca,
        args.pca_components,
        args.tsne_components,
        args.tsne_perplexity,
        args.tsne_lr
    ))

    if args.use_pca:
        pca = PCA(n_components = args.pca_components)
        x_latent = pca.fit_transform(x_latent)

    figure(figsize=(6, 6))
    scatter(x_latent[:, 0], x_latent[:, 1], marker='.')
    show()

    tsne = TSNE(n_components = args.tsne_components,
                perplexity = args.tsne_perplexity,
                learning_rate = args.tsne_lr,
                n_iter = args.tsne_iterations,
                verbose = 4)
    x_latent_proj = tsne.fit_transform(x_latent)
    del x_latent

    figure(figsize=(6, 6))
    scatter(x_latent_proj[:, 0], x_latent_proj[:, 1], marker='.')
    show()
dls_funct.py 文件源码 项目:dcss_single_cell 作者: srmcc 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def tSNE_pairwise(D):
    """
    From clustering_on_transcript_compatibility_counts, see github for MIT license
    """
    tsne = manifold.TSNE(n_components=2, random_state=0, metric='precomputed', n_iter=2000, verbose=1);
    X_tsne = tsne.fit_transform(D);
    return X_tsne

# Plot function with Zeisel's colors corresponding to labels
embedding.py 文件源码 项目:Dragonfly 作者: duaneloh 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def do_embedding(self, event=None):
        converted = self.parent.converted
        if converted is None:
            #self.conversion.convert_frames()
            self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
            converted = self.parent.converted

        method_ind = self.method.currentIndex()
        print('Doing %s' % self.method.currentText())
        if method_ind == 0:
            self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
        elif method_ind == 1:
            self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
        elif method_ind == 2:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
        elif method_ind == 3:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
        elif method_ind == 4:
            self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
        elif method_ind == 5:
            self.embedder = manifold.TSNE(n_components=3, init='pca')
        self.embedder.fit(converted)
        self.embed = self.embedder.embedding_
        self.embed_plot = self.embed

        self.gen_hist()
        self.plot_embedding()
        if not self.embedded:
            self.add_classes_frame()
        self.embedded = True
utility.py 文件源码 项目:smiles-neural-network 作者: PMitura 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def visualize2D(model, layerID, inputData, labels, withTime = False):
    print("\n  Generating output distribution for layer {}".format(layerID))
    vLayer = K.function([model.layers[0].input], [model.layers[layerID].output])
    result = vLayer([inputData])

    values = []
    for instance in result:
        for line in instance:
            array = []
            for val in line:
                if withTime:
                    for deepVal in val:
                        array.append(deepVal)
                else:
                    array.append(val)
            values.append(array)
    npvalues = np.array(values)

    model = TSNE(n_components = 2, random_state = 0)
    # model = PCA(n_components = 2)
    scatterValues = model.fit_transform(npvalues)
    labels2D = np.zeros((len(labels), 1))
    for i in range(len(labels)):
        labels2D[i][0] = labels[i]
    scatterValues = np.hstack((scatterValues, labels2D))

    dFrame = pd.DataFrame(scatterValues, columns = ('a', 'b', 'c'))
    plot = dFrame.plot.scatter(x = 'a', y = 'b', c = 'c', cmap = 'plasma')
    fig = plot.get_figure()
    fig.savefig('{}/{}'.format(cc.cfg['plots']['dir'],SCATTER_NAME))

    print("  ...done")
views.py 文件源码 项目:texta 作者: texta-tk 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def index(request):
    if 'model' not in request.session:
        return HttpResponseRedirect(URL_PREFIX + '/')
    template = loader.get_template('conceptualiser.html')

    lexicons = []

    for lexicon in Lexicon.objects.all().filter(author=request.user):
        setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count())
        lexicons.append(lexicon)

    methods = ["PCA","TSNE","MDS"]

    return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods},request))
tf_glove.py 文件源码 项目:FYP-AutoTextSum 作者: MrRexZ 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def generate_tsne(self, path="glove/model/model", size=(100, 100), word_count=1000, embeddings=None):
        if embeddings is None:
            embeddings = self.embeddings
        from sklearn.manifold import TSNE
        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
        low_dim_embs = tsne.fit_transform(numpy.asarray(list(embeddings.values())))
        labels = self.words[:word_count]
        return _plot_with_labels(low_dim_embs, labels, path, size)
utils.py 文件源码 项目:Vulcan 作者: rfratila 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def display_tsne(train_x, train_y, label_map=None):
    """
    t-distributed Stochastic Neighbor Embedding (t-SNE) visualization [1].

    [1]: Maaten, L., Hinton, G. (2008). Visualizing Data using t-SNE.
            JMLR 9(Nov):2579--2605.

    Args:
        train_x: 2d numpy array (batch, features) of samples
        train_y: 2d numpy array (batch, labels) for samples
        label_map: a dict of labelled (str(int), string) key, value pairs
    """
    tsne = TSNE(n_components=2, random_state=0)
    x_transform = tsne.fit_transform(train_x)
    y_unique = np.unique(train_y)
    if label_map is None:
        label_map = {str(i): str(i) for i in y_unique}
    elif not isinstance(label_map, dict):
        raise ValueError('label_map most be a dict of a key'
                         ' mapping to its true label')
    colours = plt.cm.rainbow(np.linspace(0, 1, len(y_unique)))
    plt.figure()
    for index, cl in enumerate(y_unique):
        plt.scatter(x=x_transform[train_y == cl, 0],
                    y=x_transform[train_y == cl, 1],
                    s=100,
                    c=colours[index],
                    marker='o',
                    edgecolors='none',
                    label=label_map[str(cl)])
    plt.xlabel('X in t-SNE')
    plt.ylabel('Y in t-SNE')
    plt.legend(loc='upper right')
    plt.title('t-SNE visualization')
    plt.show(False)
knock99.py 文件源码 项目:100knock2016 作者: tmu-nlp 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main():
    model = TSNE(n_components=2)
    countries = dictdata(getCountrydict())
    result = model.fit_transform(countries.getData())
    hidden, graph = plt.subplots()
    graph.scatter(result[:, 0], result[:, 1], s=1)
    for i, country in enumerate(countries.getName()):
        graph.annotate(country, xy=(result[i, 0], result[i, 1]), size=10)
    plt.show()
pretraining.py 文件源码 项目:pandora 作者: mikekestemont 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
        # collect embeddings for mfi:
        X = np.asarray([self.w2v_model[w] for w in self.mfi \
                            if w in self.w2v_model], dtype='float32')
        # dimension reduction:
        tsne = TSNE(n_components=2)
        coor = tsne.fit_transform(X) # unsparsify

        plt.clf()
        sns.set_style('dark')
        sns.plt.rcParams['axes.linewidth'] = 0.4
        fig, ax1 = sns.plt.subplots()  

        labels = self.mfi
        # first plot slices:
        x1, x2 = coor[:,0], coor[:,1]
        ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
        # clustering on top (add some colouring):
        clustering = AgglomerativeClustering(linkage='ward',
                            affinity='euclidean', n_clusters=nb_clusters)
        clustering.fit(coor)
        # add names:
        for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
            ax1.text(x, y, name, ha='center', va="center",
                     color=plt.cm.spectral(cluster_label / 10.),
                     fontdict={'family': 'Arial', 'size': 8})
        # control aesthetics:
        ax1.set_xlabel('')
        ax1.set_ylabel('')
        ax1.set_xticklabels([])
        ax1.set_xticks([])
        ax1.set_yticklabels([])
        ax1.set_yticks([])
        sns.plt.savefig(outputfile, bbox_inches=0)
w2v.py 文件源码 项目:wtfrnn 作者: juliakreutzer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main(_):
  """Train a word2vec model."""
  if not FLAGS.train_data or not FLAGS.save_path:
    print("--train_data and --save_path must be specified.")
    sys.exit(1)
  opts = Options()
  with tf.Graph().as_default(), tf.Session() as session:
    model = Word2Vec(opts, session)
    for _ in xrange(opts.epochs_to_train):
      model.train()  # Process one epoch
    # Perform a final save.
    model.saver.save(session,
                     os.path.join(opts.save_path, opts.name+".model.base.ckpt"),
                     global_step=model.global_step)
    model.nearby(['Switzerland'])

    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    plot_only = len(model._id2word)
    final_embeddings = model._emb.eval(session)
    print(final_embeddings)
    pkl.dump(final_embeddings,open("embeddings/"+opts.name+".emb.base.pkl","wb"))
    pkl.dump(model._word2id, open("dicts/"+opts.name+".w2i.base.pkl","wb"))
    pkl.dump(model._id2word, open("dicts/"+opts.name+".i2w.base.pkl","wb"))
    low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
    print(low_dim_embs)
   # print(zip(model._id2word.iteritems(),low_dim_embs))
    labels = [model._id2word[i] for i in xrange(plot_only)]
    plot_with_labels(low_dim_embs, labels,"plots/"+opts.name+".tsne.base.png")

    if FLAGS.interactive:
      # E.g.,
      # [0]: model.analogy('france', 'paris', 'russia')
      # [1]: model.nearby(['proton', 'elephant', 'maxwell'])
      _start_shell(locals())
modules.py 文件源码 项目:visualize-tsne 作者: YontiLevin 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def calculate_tsne(self):
        self._perform_svd()
        if self.method == SKLEARN:
            tsne_vectors = TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(self.data_vectors)
        else:
            tsne_vectors = MATTENS_TSNE(self.data_vectors, no_dims=2, initial_dims=self.data_vectors.shape[1],
                                        perplexity=40.0)
        self.tsne_vectors = tsne_vectors
tsne_books.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def main():
    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1])
    for i in xrange(D):
        plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1]))
    plt.show()
tsne_donut.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main():
    X, Y = get_donut_data()

    plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
    plt.show()

    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
tsne_mnist.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def main():
    Xtrain, Ytrain, _, _ = getKaggleMNIST()

    sample_size = 1000
    X = Xtrain[:sample_size]
    Y = Ytrain[:sample_size]

    tsne = TSNE()
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
unsupervised.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def main():
    Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
    dbn = DBN([1000, 750, 500], UnsupervisedModel=AutoEncoder)
    # dbn = DBN([1000, 750, 500, 10])
    output = dbn.fit(Xtrain, pretrain_epochs=2)
    print "output.shape", output.shape

    # sample before using t-SNE because it requires lots of RAM
    sample_size = 600
    tsne = TSNE()
    reduced = tsne.fit_transform(output[:sample_size])
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
    plt.title("t-SNE visualization")
    plt.show()

    # t-SNE on raw data
    reduced = tsne.fit_transform(Xtrain[:sample_size])
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
    plt.title("t-SNE visualization")
    plt.show()

    pca = PCA()
    reduced = pca.fit_transform(output)
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain, alpha=0.5)
    plt.title("PCA visualization")
    plt.show()
tsne_xor.py 文件源码 项目:lazyprogrammer 作者: inhwane 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main():
    X, Y = get_xor_data()

    plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
    plt.show()

    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
visualize_embeddings_tsne.py 文件源码 项目:laughter 作者: ganesh-srinivas 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main():
    audio_embeddings_dict = cPickle.load(open(AUDIO_EMBEDDINGS_DICT, 'rb'))
    audio_label_indices_dict = cPickle.load(open(AUDIO_LABEL_INDICES_DICT, 'rb'))

    X = []
    ids = []
    for k in audio_embeddings_dict.keys()[:EXAMPLES_SIZE_LIMIT]:
       for embedding in audio_embeddings_dict[k]:
           X.append(embedding) 
           ids.append(audio_label_indices_dict[k])

    # Apply t-SNE
    tsne = TSNE(n_components=N_COMPONENTS, perplexity=PERPLEXITY, \
                learning_rate=LEARNING_RATE, n_iter=N_ITER)
    Xtransformed = tsne.fit_transform(X)

    # save the embeddings along with the list of class IDs associated with
    # the clip from which it was taken.

    # Header for output file
    if N_COMPONENTS == 2:
        output_lines = ["dim1,dim2,labels"]
    elif N_COMPONENTS == 3:
        output_lines = ["dim1,dim2,dim3,labels"]

    for i in range(len(Xtransformed)):
        output_lines.append(",".join([str(j) for j in Xtransformed[i]])+ \
                            "," + ",".join([str(k) for k in ids[i]]))

    output_file_contents = "\n".join(output_lines) 
    with open(OUTPUT_FILENAME, 'w') as fh:
        fh.write(output_file_contents)


问题


面经


文章

微信
公众号

扫码关注公众号