def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs):
"""
input: N x F dataframe of observations
output: N x n_dim image of input data under lens function
"""
if n_dim != 2:
raise 'error: image of data set must be two-dimensional'
if dist not in ['euclidean', 'correlation']:
raise 'error: only euclidean and correlation distance metrics are supported'
if lens == 'pca' and dist != 'euclidean':
raise 'error: PCA requires the use of euclidean distance metric'
if lens == 'pca':
df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index)
elif lens == 'mds':
D = metrics.pairwise.pairwise_distances(df, metric=dist)
df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index)
elif lens == 'neighbor':
D = metrics.pairwise.pairwise_distances(df, metric=dist)
df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index)
else:
raise 'error: only PCA, MDS, neighborhood lenses are supported'
return df_lens
python类SpectralEmbedding()的实例源码
def do_embedding(self, event=None):
converted = self.parent.converted
if converted is None:
#self.conversion.convert_frames()
self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
converted = self.parent.converted
method_ind = self.method.currentIndex()
print('Doing %s' % self.method.currentText())
if method_ind == 0:
self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
elif method_ind == 1:
self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
elif method_ind == 2:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
elif method_ind == 3:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
elif method_ind == 4:
self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
elif method_ind == 5:
self.embedder = manifold.TSNE(n_components=3, init='pca')
self.embedder.fit(converted)
self.embed = self.embedder.embedding_
self.embed_plot = self.embed
self.gen_hist()
self.plot_embedding()
if not self.embedded:
self.add_classes_frame()
self.embedded = True
def cluster_scatter_plot(similarity_file):
def get_cmap(N):
'''Returns a function that maps each index in 0, 1, ... N-1 to a distinct
RGB color.'''
color_norm = colors.Normalize(vmin=0, vmax=N-1)
scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv')
def map_index_to_rgb_color(index):
return scalar_map.to_rgba(index)
return map_index_to_rgb_color
with open(similarity_file, 'r', 'utf-8') as f:
similarity_data = json.load(f)
labels = []
point_colors = []
num_clusters = len(similarity_data['cluster2doc'].keys())
cmap = get_cmap(num_clusters)
for model_name in similarity_data['model_names']:
model_name = os.path.splitext(os.path.basename(model_name))[0]
cluster_label = similarity_data['doc2cluster'][model_name]
point_colors.append(cmap(cluster_label))
labels.append(re.compile(r"\s\([0-9]*\)-iter.*", re.IGNORECASE).split(model_name, 1)[0])
embeddings = SpectralEmbedding(affinity='precomputed').fit_transform(np.array(similarity_data['similarity_matrix']))
fig, ax = plt.subplots()
x = embeddings[:, 0]
y = embeddings[:, 1]
annotes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * 10
N = 100
scatter = ax.scatter(x, y, c=point_colors[:],s=100*np.ones(shape=N))
tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
mpld3.plugins.connect(fig, tooltip)
mpld3.show()
# plt.scatter(tsne_embeddings[20:40, 0], tsne_embeddings[20:40, 1], c='b')
# for label, x, y in zip(labels, tsne_embeddings[:, 0], tsne_embeddings[:, 1]):
# plt.annotate(
# label,
# xy = (x, y),
# # textcoords = 'offset points',
# bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5))
# plt.show()