def tsne_cluster_cuisine(df,sublist):
lenlist=[0]
df_sub = df[df['cuisine']==sublist[0]]
lenlist.append(df_sub.shape[0])
for cuisine in sublist[1:]:
temp = df[df['cuisine']==cuisine]
df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
lenlist.append(df_sub.shape[0])
df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
print df_X.shape, lenlist
dist = squareform(pdist(df_X, metric='cosine'))
tsne = TSNE(metric='precomputed').fit_transform(dist)
palette = sns.color_palette("hls", len(sublist))
plt.figure(figsize=(10,10))
for i,cuisine in enumerate(sublist):
plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
plt.legend()
#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
python类TSNE的实例源码
def word_cloud(word_embedding_matrix, vocab, s, save_file='scatter.png'):
words = [(i, vocab[i]) for i in s]
model = TSNE(n_components=2, random_state=0)
#Note that the following line might use a good chunk of RAM
tsne_embedding = model.fit_transform(word_embedding_matrix)
words_vectors = tsne_embedding[np.array([item[1] for item in words])]
plt.subplots_adjust(bottom = 0.1)
plt.scatter(
words_vectors[:, 0], words_vectors[:, 1], marker='o', cmap=plt.get_cmap('Spectral'))
for label, x, y in zip(s, words_vectors[:, 0], words_vectors[:, 1]):
plt.annotate(
label,
xy=(x, y), xytext=(-20, 20),
textcoords='offset points', ha='right', va='bottom',
fontsize=20,
# bbox=dict(boxstyle='round,pad=1.', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '<-', connectionstyle='arc3,rad=0')
)
plt.show()
# plt.savefig(save_file)
def plot_tsne(z_mu, classes, name):
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
model_tsne = TSNE(n_components=2, random_state=0)
z_states = z_mu.data.cpu().numpy()
z_embed = model_tsne.fit_transform(z_states)
classes = classes.data.cpu().numpy()
fig666 = plt.figure()
for ic in range(10):
ind_vec = np.zeros_like(classes)
ind_vec[:, ic] = 1
ind_class = classes[:, ic] == 1
color = plt.cm.Set1(ic)
plt.scatter(z_embed[ind_class, 0], z_embed[ind_class, 1], s=10, color=color)
plt.title("Latent Variable T-SNE per Class")
fig666.savefig('./vae_results/'+str(name)+'_embedding_'+str(ic)+'.png')
fig666.savefig('./vae_results/'+str(name)+'_embedding.png')
def compute_bulk_smushing(self):
"""Get average signal from each plate ('bulk') and find 2d embedding"""
grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])
if os.path.exists(self.bulk_smushed_cache_file):
smushed = pd.read_csv(self.bulk_smushed_cache_file, names=[0, 1],
header=0, index_col=0)
# if the set of plates hasn't changed, return the cached version
if set(grouped.groups) == set(smushed.index):
return smushed
# if the cache was missing or invalid, compute a new projection
medians = grouped.median()
smusher = TSNE(random_state=0, perplexity=10, metric='cosine')
smushed = pd.DataFrame(smusher.fit_transform(medians),
index=medians.index)
smushed.to_csv(self.bulk_smushed_cache_file)
return smushed
def compute_cell_smushing(self):
"""Within each plate, find a 2d embedding of all cells"""
grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])
if os.path.exists(self.cell_smushed_cache_file):
smusheds = pd.read_pickle(self.cell_smushed_cache_file)
# if nothing is missing, return the cached version
if not set(grouped.groups) - set(smusheds):
return smusheds
else:
smusheds = {}
for plate_name, genes_subset in grouped:
if plate_name not in smusheds:
cell_smusher = TSNE(metric='cosine', random_state=0)
cell_smushed = pd.DataFrame(
cell_smusher.fit_transform(genes_subset),
index=genes_subset.index)
smusheds[plate_name] = cell_smushed
pd.to_pickle(smusheds, self.cell_smushed_cache_file)
return smusheds
def gen(self):
embedding, _ = self.embedding()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, tf.train.latest_checkpoint('.'))
embedding = sess.run(embedding)
# ???
data = embedding[:self.viz_words, :]
# ???????
tsne = TSNE(n_components=2, init='pca', random_state=0)
embed_tsne = tsne.fit_transform(data)
# ??
plt.subplots(figsize=(10, 10))
for idx in range(self.viz_words):
plt.scatter(*embed_tsne[idx, :], color='steelblue')
plt.annotate(self.train_text.int_to_vocab[idx], (embed_tsne[idx, 0], embed_tsne[idx, 1]), alpha=0.7)
plt.show()
def main():
args = parse_args()
print('Called with args:')
print(args)
lang_db = get_language_model(args.lang_name)
imdb = get_imdb(args.imdb_name)
# Get words in space
vocabulary = imdb.get_labels(args.space)
# Get features for words
wv = [lang_db.word_vector(w) for w in vocabulary]
from sklearn.metrics.pairwise import cosine_similarity
from scipy import spatial
#spatial.distance.cosine(dataSetI, dataSetII)
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(wv)
plt.scatter(Y[:, 0], Y[:, 1])
for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
plt.show()
def main(we_file='glove_model_50.npz', w2i_file='glove_word2idx_50.json'):
words = ['japan', 'japanese', 'england', 'english', 'australia', 'australian', 'china', 'chinese', 'italy', 'italian', 'french', 'france', 'spain', 'spanish']
with open(w2i_file) as f:
word2idx = json.load(f)
npz = np.load(we_file)
W = npz['arr_0']
V = npz['arr_1']
We = (W + V.T) / 2
idx = [word2idx[w] for w in words]
# We = We[idx]
tsne = TSNE()
Z = tsne.fit_transform(We)
Z = Z[idx]
plt.scatter(Z[:,0], Z[:,1])
for i in xrange(len(words)):
plt.annotate(s=words[i], xy=(Z[i,0], Z[i,1]))
plt.show()
def take_action(self, parsed_args):
if not parsed_args.input.exists():
raise IOError("failed to open data set at {}".format(parsed_args.input))
data_set = load(parsed_args.input)
features = np.reshape(data_set.features, [data_set.num_instances, -1])
if features.shape[1] > 50:
self.log.info("applying PCA")
pca = PCA(n_components=200)
pca.fit(features)
features = pca.transform(features)
self.log.info("computing T-SNE embedding")
tsne = TSNE(perplexity=parsed_args.perplexity,
learning_rate=parsed_args.learning_rate,
verbose=self.app_args.verbose_level)
embedding = tsne.fit_transform(features)
self.log.info("plotting embedding")
self.plot_with_labels(data_set, embedding)
def _plot_proto_symbol_space(coordinates, target_names, name, args):
# Reduce to 2D so that we can plot it.
coordinates_2d = TSNE().fit_transform(coordinates)
n_samples = coordinates_2d.shape[0]
x = coordinates_2d[:, 0]
y = coordinates_2d[:, 1]
colors = cm.rainbow(np.linspace(0, 1, n_samples))
fig = plt.figure(1)
plt.clf()
ax = fig.add_subplot(111)
dots = []
for idx in xrange(n_samples):
dots.append(ax.plot(x[idx], y[idx], "o", c=colors[idx], markersize=15)[0])
ax.annotate(target_names[idx], xy=(x[idx], y[idx]))
lgd = ax.legend(dots, target_names, ncol=4, numpoints=1, loc='upper center', bbox_to_anchor=(0.5,-0.1))
ax.grid('on')
if args.output_dir is not None:
path = os.path.join(args.output_dir, name + '.pdf')
print('Saved plot to file "%s"' % path)
fig.savefig(path, bbox_extra_artists=(lgd,), bbox_inches='tight')
else:
plt.show()
def embed_or_load_cache(codes, gen, r_idx, batch_size, save_path):
cache_fp = os.path.join(save_path, 'tsne_plots', 'embedded_points_r%02d.csv' % (r_idx,))
if os.path.isfile(cache_fp):
lines = open(cache_fp).readlines()
lines = [line.strip().split(",") for line in lines[1:]]
vals = [(float(x), float(y)) for (x, y) in lines]
return np.array(vals, dtype=np.float32)
else:
codes_r = generate_codes_by_r(gen, codes, r_idx, batch_size)
print(codes_r.shape)
print("Embedding %s via TSNE..." % (str(codes_r.shape),))
tsne = TSNE(perplexity=40, n_iter=10000, learning_rate=4000, verbose=True)
#tsne = TSNE(perplexity=40, n_iter=10000, n_jobs=4, verbose=True)
#tsne = PCA(n_components=2)
codes_r_2d = tsne.fit_transform(codes_r.astype(np.float64))
print("shape after embedding: %s" % (str(codes_r_2d.shape),))
with open(cache_fp, "w") as f:
f.write("#x,y\n")
for i in xrange(codes_r.shape[0]):
f.write("%.6f,%.6f\n" % (codes_r_2d[i, 0], codes_r_2d[i, 1]))
return codes_r_2d
def plotInputData(X, Y, title, data_len):
time_start = time.time()
X = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300).fit_transform(X)
print("After Reduction Data Shape : {0}".format(X.shape))
print 't-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start)
# Main scatter plot and plot annotation
f, ax = plt.subplots(figsize=(7, 7))
ax.scatter(X[:data_len / 2, 0] * 10, X[:data_len / 2, 1] * 10, marker = 'o', color = 'green', s=30, alpha=0.5)
ax.scatter(X[data_len / 2:, 0] * 10, X[data_len / 2:, 1] * 10, marker = '^', color = 'blue', s=30, alpha=0.5)
plt.legend(["Melanoma", "Benign"], loc='upper right')
plt.title(title)
plt.ylabel('Y')
plt.xlabel('X')
# plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
# plt.xlabel('X')
# plt.ylabel('Y')
# plt.title('SVC Data Plot')
plt.show()
def main():
# model_file = "../data/word2vec/character.model"
model_file = "../data/word2vec_new/word.model"
checkSimilarity(model_file, "?")
# character_wv_file = '../data/word2vec/character_model.txt'
# word_wv_file = '../data/word2vec/word_model.txt'
#
# embeddings_file = word_wv_file
# wv, vocabulary = load_embeddings(embeddings_file)
#
# tsne = TSNE(n_components=2, random_state=0)
# np.set_printoptions(suppress=True)
# Y = tsne.fit_transform(wv[:1000, :])
#
# plt.scatter(Y[:, 0], Y[:, 1])
# for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
# plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
# plt.show()
def __init__(self, ax=None, decompose='svd', decompose_by=50, classes=None,
colors=None, colormap=None, **kwargs):
"""
Initialize the TSNE visualizer with visual hyperparameters.
"""
super(TSNEVisualizer, self).__init__(ax=ax, **kwargs)
# Visualizer parameters
self.classes_ = classes
self.n_instances_ = 0
# Visual Parameters
# TODO: Only colors currently works to select the colors of classes.
self.colors = colors
self.colormap = colormap
# TSNE Parameters
self.transformer_ = self.make_transformer(decompose, decompose_by, kwargs)
def finalize(self, **kwargs):
"""
Finalize the drawing by adding a title and legend, and removing the
axes objects that do not convey information about TNSE.
"""
# Add a title
self.set_title(
"TSNE Projection of {} Documents".format(self.n_instances_)
)
# Remove the ticks
self.ax.set_yticks([])
self.ax.set_xticks([])
# Add the legend outside of the figure box.
if self.classes_:
box = self.ax.get_position()
self.ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
self.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def tSNE_model(Vt,aid_dict):
tsne_model = TSNE(n_components=2, verbose=1, random_state=0)
tsne_V = tsne_model.fit_transform(np.transpose(Vt))
# Put data in a pandas dataframe:
tsne_df = pd.DataFrame(tsne_V, columns=['x', 'y'])
# Save it:
tsne_df.to_csv('results\\tsne_svd.csv')
# Get anime names:
con = sqlite3.connect('user_anime_data.db')
cur = con.cursor()
anime_data = cur.execute('SELECT Anime, Name, Score FROM animeData').fetchall()
anime_data=dict([(x[0],(x[1],x[2])) for x in anime_data])
anime_names = [anime_data[aid_dict[x]][0] for x in range(Vt.shape[1])]
anime_scores = [anime_data[aid_dict[x]][1] for x in range(Vt.shape[1])]
anime_ids = [aid_dict[x] for x in range(Vt.shape[1])]
tsne_df['anime_name'] = anime_names
tsne_df['anime_id'] = anime_ids
tsne_df['rating'] = anime_scores
return tsne_df
# Plotting the data:
def computeTSNEProjectionOfLatentSpace(X, encoder, display=True):
# Compute latent space representation
print("Computing latent space projection...")
X_encoded = encoder.predict(X)
# Compute t-SNE embedding of latent space
print("Computing t-SNE embedding...")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X_encoded)
# Plot images according to t-sne embedding
if display:
print("Plotting t-SNE visualization...")
fig, ax = plt.subplots()
imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
plt.show()
else:
return X_tsne
# Show dataset images with T-sne projection of pixel space
def computeTSNEProjectionOfPixelSpace(X, display=True):
# Compute t-SNE embedding of latent space
print("Computing t-SNE embedding...")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X.reshape([-1,imageSize*imageSize*3]))
# Plot images according to t-sne embedding
if display:
print("Plotting t-SNE visualization...")
fig, ax = plt.subplots()
imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
plt.show()
else:
return X_tsne
# Reconstructions for samples in dataset
def plot_tsne(images, X, filename):
def imscatter(x, y, images, ax=None, zoom=1.0):
if ax is None:
ax = plt.gca()
x, y = np.atleast_1d(x, y)
artists = []
for x0, y0, img0 in zip(x, y, images):
im = OffsetImage(img0, zoom=zoom)
ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
artists.append(ax.add_artist(ab))
ax.update_datalim(np.column_stack([x, y]))
ax.autoscale()
return artists
def plot_embedding(X, imgs, title=None):
x_min, x_max = np.min(X, 0), np.max(X, 0)
X = (X - x_min) / (x_max - x_min)
plt.figure()
ax = plt.subplot(111)
for i in range(X.shape[0]):
plt.text(X[i, 0], X[i, 1], ".", fontdict={'weight': 'bold', 'size': 9})
if hasattr(offsetbox, 'AnnotationBbox'):
imscatter(X[:,0], X[:,1], imgs, zoom=0.1, ax=ax)
plt.xticks([]), plt.yticks([])
if title is not None:
plt.title(title)
print("Computing t-SNE embedding")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X)
plot_embedding(X_tsne, images, "t-SNE embedding of images")
plt.savefig(filename, bbox_inches='tight')
# Driver
def plot(self, filename="./corpus/model/blog.png"):
tsne = TSNE(perplexity=30, n_components=2, init="pca", n_iter=5000)
plot_only=500
low_dim_embeddings = tsne.fit_transform(self.final_embeddings[:plot_only, :])
reversed_dictionary = dict(zip(self.dictionary.values(), self.dictionary.keys()))
labels = [reversed_dictionary[i] for i in range(plot_only)]
plt.figure(figsize=(18, 18))
for i, label in enumerate(labels):
x, y = low_dim_embeddings[i, :]
plt.scatter(x, y)
plt.annotate(label,
xy=(x, y),
xytext=(5, 2),
textcoords="offset points",
ha="right",
va="bottom")
plt.savefig(filename)
print("Scatter plot was saved to", filename)
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
plt.figure(figsize=(18, 18)) # in inches
for i, label in enumerate(labels):
x, y = low_dim_embs[i, :]
plt.scatter(x, y)
plt.annotate(label,
xy=(x, y),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
plt.savefig(filename)
# TSNE ????
def plot_embedding2D(node_pos, node_colors=None, di_graph=None):
node_num, embedding_dimension = node_pos.shape
if(embedding_dimension > 2):
print("Embedding dimension greater than 2, use tSNE to reduce it to 2")
model = TSNE(n_components=2)
node_pos = model.fit_transform(node_pos)
if di_graph is None:
# plot using plt scatter
plt.scatter(node_pos[:, 0], node_pos[:, 1], c=node_colors)
else:
# plot using networkx with edge structure
pos = {}
for i in range(node_num):
pos[i] = node_pos[i, :]
if node_colors:
nx.draw_networkx_nodes(di_graph, pos,
node_color=node_colors,
width=0.1, node_size=100,
arrows=False, alpha=0.8,
font_size=5)
else:
nx.draw_networkx(di_graph, pos, node_color=node_colors,
width=0.1, node_size=300, arrows=False,
alpha=0.8, font_size=12)
def main():
f = open('label.txt','w')
#target_names = np.array(args.names)
X, target_names, y = getXY(args.image_dir)
X = np.asfarray(X,dtype='float')
colors = cm.gnuplot2(np.linspace(0, 1, len(target_names)))
#X_pca = PCA(n_components=128).fit_transform(X)
X_pca = X
tsne = TSNE(n_components=2, init='random', random_state=0)
X_r = tsne.fit_transform(X_pca)
for c, i, target_name in zip(colors,
list(range(0, len(target_names))),
target_names):
plt.scatter(X_r[y[i], 0], X_r[y[i], 1],
c=c, label=str(i+1))
f.write(target_name+'\n')
plt.legend()
plt.savefig("{}/10crop1.png".format('./'))
f.close()
def twoDB(Xtrain, Ytrain):
pca = PCA()
reduced = pca.fit_transform(Xtrain)
tsne = TSNE()
Z = tsne_divide(tsne, reduced, len(Ytrain))
# Save the PCA vectors
with open("packet_tsneBinfo"+str(reduce_number), "wb") as f:
pickle.dump(Z, f)
pickle.dump(Xtrain, f)
pickle.dump(Ytrain, f)
return
def threeDB(Xtrain, Ytrain):
pca = PCA()
reduced = pca.fit_transform(Xtrain)
tsne = TSNE(n_components=3)
Z = tsne_divide(tsne, reduced, len(Ytrain))
# Save the PCA vectors
with open("packet_3DtsneBinfo"+str(reduce_number), "wb") as f:
pickle.dump(Z, f)
pickle.dump(Xtrain, f)
pickle.dump(Ytrain, f)
return
def main(flags):
with open(flags.emb_file, 'rb') as f:
emb_dict = pickle.load(f)
final_embeddings = []
words = []
for k, v in emb_dict.items():
words.append(k)
final_embeddings.append(v)
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
low_dim_embs = tsne.fit_transform(final_embeddings[:flags.plot_num])
labels = words[:flags.plot_num]
plot_with_labels(low_dim_embs, labels)
return 0
def plot_bokeh(df,sublist,filename):
lenlist=[0]
df_sub = df[df['cuisine']==sublist[0]]
lenlist.append(df_sub.shape[0])
for cuisine in sublist[1:]:
temp = df[df['cuisine']==cuisine]
df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
lenlist.append(df_sub.shape[0])
df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
print df_X.shape, lenlist
dist = squareform(pdist(df_X, metric='cosine'))
tsne = TSNE(metric='precomputed').fit_transform(dist)
#cannot use seaborn palette for bokeh
palette =['red','green','blue','yellow']
colors =[]
for i in range(len(sublist)):
for j in range(lenlist[i+1]-lenlist[i]):
colors.append(palette[i])
#plot with boken
output_file(filename)
source = ColumnDataSource(
data=dict(x=tsne[:,0],y=tsne[:,1],
cuisine = df_sub['cuisine'],
recipe = df_sub['recipeName']))
hover = HoverTool(tooltips=[
("cuisine", "@cuisine"),
("recipe", "@recipe")])
p = figure(plot_width=1000, plot_height=1000, tools=[hover],
title="flavor clustering")
p.circle('x', 'y', size=10, source=source,fill_color=colors)
show(p)
def plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
# markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
plt.rc('legend',**{'fontsize':30})
classes_to_visual = list(set(classes_to_visual))
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_ids = dict(zip(classes_to_visual, range(C)))
if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
else:
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual:
idx = np.array(labels) == c
# idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=1, marker=markers[class_ids[c]],
markersize=10, label=c)
legend = plt.legend(loc='upper right', shadow=True)
# plt.title("tsne")
# plt.savefig(save_file)
plt.savefig(save_file, format='eps', dpi=2000)
plt.show()
def plot_tsne_3d(doc_codes, doc_labels, classes_to_visual, save_file, maker_size=None, opaque=None):
markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
plt.rc('legend',**{'fontsize':20})
colors = ['r', 'b', 'g', 'c', 'm', 'y', 'k']
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
while True:
if C <= len(colors):
break
colors += colors
class_ids = dict(zip(classes_to_visual, range(C)))
if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
else:
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=3, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
fig = plt.figure(figsize=(10, 10), facecolor='white')
ax = fig.add_subplot(111, projection='3d')
# The problem is that the legend function don't support the type returned by a 3D scatter.
# So you have to create a "dummy plot" with the same characteristics and put those in the legend.
scatter_proxy = []
for i in range(C):
cls = classes_to_visual[i]
idx = np.array(labels) == cls
ax.scatter(X[idx, 0], X[idx, 1], X[idx, 2], c=colors[i], alpha=opaque[i] if opaque else 1, s=maker_size[i] if maker_size else 20, marker=markers[i], label=cls)
scatter_proxy.append(mpl.lines.Line2D([0],[0], linestyle="none", c=colors[i], marker=markers[i], label=cls))
ax.legend(scatter_proxy, classes_to_visual, numpoints=1)
plt.savefig(save_file)
plt.show()
def DBN_plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_ids = dict(zip(classes_to_visual.keys(), range(C)))
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual.keys():
idx = np.array(labels) == c
# idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
markersize=6, label=classes_to_visual[c])
legend = plt.legend(loc='upper center', shadow=True)
plt.title("tsne")
plt.savefig(save_file)
plt.show()