def reuters_visualize_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
"""
Visualize the input data on a 2D PCA plot. Depending on the number of components,
the plot will contain an X amount of subplots.
@param doc_codes:
@param number_of_components: The number of principal components for the PCA plot.
"""
# markers = ["p", "s", "h", "H", "+", "x", "D"]
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_names = classes_to_visual.keys()
class_ids = dict(zip(class_names, range(C)))
class_names = set(class_names)
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if class_names.intersection(set(doc_labels[doc]))])
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual.keys():
idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
markersize=6, label=classes_to_visual[c])
legend = plt.legend(loc='upper center', shadow=True)
plt.title("tsne")
plt.savefig(save_file)
plt.show()
python类TSNE的实例源码
def fit_tsne(values):
if not values:
return
start = time.time()
mat = np.array(values)
model = TSNE(n_components=2, random_state=0, learning_rate=150, init='pca')
fitted = model.fit_transform(mat)
print "FIT TSNE TOOK %s" % (time.time() - start)
return fitted
def save_visualization_to_image(self, inputs, outputs, folder_path_for_result_image):
print("Computing t-SNE embedding")
x = np.array([state.reshape(-1, ) for state in inputs])
y = outputs
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
x_tsne = tsne.fit_transform(x)
self._tsne_plot_embedding(x=x_tsne,
y=y,
inputs=inputs,
path_result_image=os.path.join(folder_path_for_result_image, "t-SNE.png"))
def visualize_latent_rep(args, model, x_latent):
print("pca_on=%r pca_comp=%d tsne_comp=%d tsne_perplexity=%f tsne_lr=%f" % (
args.use_pca,
args.pca_components,
args.tsne_components,
args.tsne_perplexity,
args.tsne_lr
))
if args.use_pca:
pca = PCA(n_components = args.pca_components)
x_latent = pca.fit_transform(x_latent)
figure(figsize=(6, 6))
scatter(x_latent[:, 0], x_latent[:, 1], marker='.')
show()
tsne = TSNE(n_components = args.tsne_components,
perplexity = args.tsne_perplexity,
learning_rate = args.tsne_lr,
n_iter = args.tsne_iterations,
verbose = 4)
x_latent_proj = tsne.fit_transform(x_latent)
del x_latent
figure(figsize=(6, 6))
scatter(x_latent_proj[:, 0], x_latent_proj[:, 1], marker='.')
show()
def tSNE_pairwise(D):
"""
From clustering_on_transcript_compatibility_counts, see github for MIT license
"""
tsne = manifold.TSNE(n_components=2, random_state=0, metric='precomputed', n_iter=2000, verbose=1);
X_tsne = tsne.fit_transform(D);
return X_tsne
# Plot function with Zeisel's colors corresponding to labels
def do_embedding(self, event=None):
converted = self.parent.converted
if converted is None:
#self.conversion.convert_frames()
self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
converted = self.parent.converted
method_ind = self.method.currentIndex()
print('Doing %s' % self.method.currentText())
if method_ind == 0:
self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
elif method_ind == 1:
self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
elif method_ind == 2:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
elif method_ind == 3:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
elif method_ind == 4:
self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
elif method_ind == 5:
self.embedder = manifold.TSNE(n_components=3, init='pca')
self.embedder.fit(converted)
self.embed = self.embedder.embedding_
self.embed_plot = self.embed
self.gen_hist()
self.plot_embedding()
if not self.embedded:
self.add_classes_frame()
self.embedded = True
def visualize2D(model, layerID, inputData, labels, withTime = False):
print("\n Generating output distribution for layer {}".format(layerID))
vLayer = K.function([model.layers[0].input], [model.layers[layerID].output])
result = vLayer([inputData])
values = []
for instance in result:
for line in instance:
array = []
for val in line:
if withTime:
for deepVal in val:
array.append(deepVal)
else:
array.append(val)
values.append(array)
npvalues = np.array(values)
model = TSNE(n_components = 2, random_state = 0)
# model = PCA(n_components = 2)
scatterValues = model.fit_transform(npvalues)
labels2D = np.zeros((len(labels), 1))
for i in range(len(labels)):
labels2D[i][0] = labels[i]
scatterValues = np.hstack((scatterValues, labels2D))
dFrame = pd.DataFrame(scatterValues, columns = ('a', 'b', 'c'))
plot = dFrame.plot.scatter(x = 'a', y = 'b', c = 'c', cmap = 'plasma')
fig = plot.get_figure()
fig.savefig('{}/{}'.format(cc.cfg['plots']['dir'],SCATTER_NAME))
print(" ...done")
def index(request):
if 'model' not in request.session:
return HttpResponseRedirect(URL_PREFIX + '/')
template = loader.get_template('conceptualiser.html')
lexicons = []
for lexicon in Lexicon.objects.all().filter(author=request.user):
setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count())
lexicons.append(lexicon)
methods = ["PCA","TSNE","MDS"]
return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods},request))
def generate_tsne(self, path="glove/model/model", size=(100, 100), word_count=1000, embeddings=None):
if embeddings is None:
embeddings = self.embeddings
from sklearn.manifold import TSNE
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
low_dim_embs = tsne.fit_transform(numpy.asarray(list(embeddings.values())))
labels = self.words[:word_count]
return _plot_with_labels(low_dim_embs, labels, path, size)
def display_tsne(train_x, train_y, label_map=None):
"""
t-distributed Stochastic Neighbor Embedding (t-SNE) visualization [1].
[1]: Maaten, L., Hinton, G. (2008). Visualizing Data using t-SNE.
JMLR 9(Nov):2579--2605.
Args:
train_x: 2d numpy array (batch, features) of samples
train_y: 2d numpy array (batch, labels) for samples
label_map: a dict of labelled (str(int), string) key, value pairs
"""
tsne = TSNE(n_components=2, random_state=0)
x_transform = tsne.fit_transform(train_x)
y_unique = np.unique(train_y)
if label_map is None:
label_map = {str(i): str(i) for i in y_unique}
elif not isinstance(label_map, dict):
raise ValueError('label_map most be a dict of a key'
' mapping to its true label')
colours = plt.cm.rainbow(np.linspace(0, 1, len(y_unique)))
plt.figure()
for index, cl in enumerate(y_unique):
plt.scatter(x=x_transform[train_y == cl, 0],
y=x_transform[train_y == cl, 1],
s=100,
c=colours[index],
marker='o',
edgecolors='none',
label=label_map[str(cl)])
plt.xlabel('X in t-SNE')
plt.ylabel('Y in t-SNE')
plt.legend(loc='upper right')
plt.title('t-SNE visualization')
plt.show(False)
def main():
model = TSNE(n_components=2)
countries = dictdata(getCountrydict())
result = model.fit_transform(countries.getData())
hidden, graph = plt.subplots()
graph.scatter(result[:, 0], result[:, 1], s=1)
for i, country in enumerate(countries.getName()):
graph.annotate(country, xy=(result[i, 0], result[i, 1]), size=10)
plt.show()
def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
# collect embeddings for mfi:
X = np.asarray([self.w2v_model[w] for w in self.mfi \
if w in self.w2v_model], dtype='float32')
# dimension reduction:
tsne = TSNE(n_components=2)
coor = tsne.fit_transform(X) # unsparsify
plt.clf()
sns.set_style('dark')
sns.plt.rcParams['axes.linewidth'] = 0.4
fig, ax1 = sns.plt.subplots()
labels = self.mfi
# first plot slices:
x1, x2 = coor[:,0], coor[:,1]
ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
# clustering on top (add some colouring):
clustering = AgglomerativeClustering(linkage='ward',
affinity='euclidean', n_clusters=nb_clusters)
clustering.fit(coor)
# add names:
for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
ax1.text(x, y, name, ha='center', va="center",
color=plt.cm.spectral(cluster_label / 10.),
fontdict={'family': 'Arial', 'size': 8})
# control aesthetics:
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.set_xticklabels([])
ax1.set_xticks([])
ax1.set_yticklabels([])
ax1.set_yticks([])
sns.plt.savefig(outputfile, bbox_inches=0)
def main(_):
"""Train a word2vec model."""
if not FLAGS.train_data or not FLAGS.save_path:
print("--train_data and --save_path must be specified.")
sys.exit(1)
opts = Options()
with tf.Graph().as_default(), tf.Session() as session:
model = Word2Vec(opts, session)
for _ in xrange(opts.epochs_to_train):
model.train() # Process one epoch
# Perform a final save.
model.saver.save(session,
os.path.join(opts.save_path, opts.name+".model.base.ckpt"),
global_step=model.global_step)
model.nearby(['Switzerland'])
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
plot_only = len(model._id2word)
final_embeddings = model._emb.eval(session)
print(final_embeddings)
pkl.dump(final_embeddings,open("embeddings/"+opts.name+".emb.base.pkl","wb"))
pkl.dump(model._word2id, open("dicts/"+opts.name+".w2i.base.pkl","wb"))
pkl.dump(model._id2word, open("dicts/"+opts.name+".i2w.base.pkl","wb"))
low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
print(low_dim_embs)
# print(zip(model._id2word.iteritems(),low_dim_embs))
labels = [model._id2word[i] for i in xrange(plot_only)]
plot_with_labels(low_dim_embs, labels,"plots/"+opts.name+".tsne.base.png")
if FLAGS.interactive:
# E.g.,
# [0]: model.analogy('france', 'paris', 'russia')
# [1]: model.nearby(['proton', 'elephant', 'maxwell'])
_start_shell(locals())
def calculate_tsne(self):
self._perform_svd()
if self.method == SKLEARN:
tsne_vectors = TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(self.data_vectors)
else:
tsne_vectors = MATTENS_TSNE(self.data_vectors, no_dims=2, initial_dims=self.data_vectors.shape[1],
perplexity=40.0)
self.tsne_vectors = tsne_vectors
def main():
tsne = TSNE(perplexity=40)
Z = tsne.fit_transform(X)
plt.scatter(Z[:,0], Z[:,1])
for i in xrange(D):
plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1]))
plt.show()
def main():
X, Y = get_donut_data()
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
plt.show()
tsne = TSNE(perplexity=40)
Z = tsne.fit_transform(X)
plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
plt.show()
def main():
Xtrain, Ytrain, _, _ = getKaggleMNIST()
sample_size = 1000
X = Xtrain[:sample_size]
Y = Ytrain[:sample_size]
tsne = TSNE()
Z = tsne.fit_transform(X)
plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
plt.show()
def main():
Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
dbn = DBN([1000, 750, 500], UnsupervisedModel=AutoEncoder)
# dbn = DBN([1000, 750, 500, 10])
output = dbn.fit(Xtrain, pretrain_epochs=2)
print "output.shape", output.shape
# sample before using t-SNE because it requires lots of RAM
sample_size = 600
tsne = TSNE()
reduced = tsne.fit_transform(output[:sample_size])
plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
plt.title("t-SNE visualization")
plt.show()
# t-SNE on raw data
reduced = tsne.fit_transform(Xtrain[:sample_size])
plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
plt.title("t-SNE visualization")
plt.show()
pca = PCA()
reduced = pca.fit_transform(output)
plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain, alpha=0.5)
plt.title("PCA visualization")
plt.show()
def main():
X, Y = get_xor_data()
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
plt.show()
tsne = TSNE(perplexity=40)
Z = tsne.fit_transform(X)
plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
plt.show()
def main():
audio_embeddings_dict = cPickle.load(open(AUDIO_EMBEDDINGS_DICT, 'rb'))
audio_label_indices_dict = cPickle.load(open(AUDIO_LABEL_INDICES_DICT, 'rb'))
X = []
ids = []
for k in audio_embeddings_dict.keys()[:EXAMPLES_SIZE_LIMIT]:
for embedding in audio_embeddings_dict[k]:
X.append(embedding)
ids.append(audio_label_indices_dict[k])
# Apply t-SNE
tsne = TSNE(n_components=N_COMPONENTS, perplexity=PERPLEXITY, \
learning_rate=LEARNING_RATE, n_iter=N_ITER)
Xtransformed = tsne.fit_transform(X)
# save the embeddings along with the list of class IDs associated with
# the clip from which it was taken.
# Header for output file
if N_COMPONENTS == 2:
output_lines = ["dim1,dim2,labels"]
elif N_COMPONENTS == 3:
output_lines = ["dim1,dim2,dim3,labels"]
for i in range(len(Xtransformed)):
output_lines.append(",".join([str(j) for j in Xtransformed[i]])+ \
"," + ",".join([str(k) for k in ids[i]]))
output_file_contents = "\n".join(output_lines)
with open(OUTPUT_FILENAME, 'w') as fh:
fh.write(output_file_contents)