def test_isomap_simple_grid():
# Isomap should preserve distances when all neighbors are used
N_per_side = 5
Npts = N_per_side ** 2
n_neighbors = Npts - 1
# grid of equidistant points in 2D, n_components = n_dim
X = np.array(list(product(range(N_per_side), repeat=2)))
# distances from each point to all others
G = neighbors.kneighbors_graph(X, n_neighbors,
mode='distance').toarray()
for eigen_solver in eigen_solvers:
for path_method in path_methods:
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
eigen_solver=eigen_solver,
path_method=path_method)
clf.fit(X)
G_iso = neighbors.kneighbors_graph(clf.embedding_,
n_neighbors,
mode='distance').toarray()
assert_array_almost_equal(G, G_iso)
python类Isomap()的实例源码
def test_transform():
n_samples = 200
n_components = 10
noise_scale = 0.01
# Create S-curve dataset
X, y = datasets.samples_generator.make_s_curve(n_samples, random_state=0)
# Compute isomap embedding
iso = manifold.Isomap(n_components, 2)
X_iso = iso.fit_transform(X)
# Re-embed a noisy version of the points
rng = np.random.RandomState(0)
noise = noise_scale * rng.randn(*X.shape)
X_iso2 = iso.transform(X + noise)
# Make sure the rms error on re-embedding is comparable to noise_scale
assert_less(np.sqrt(np.mean((X_iso - X_iso2) ** 2)), 2 * noise_scale)
def outofsample_extensions(method='linear-regression'):
# Load the data and init seeds
train_data, train_labels, test_data, test_labels = load_mnist()
np.random.seed(1)
sklearn.utils.check_random_state(1)
n_train_samples = 5000
# Learn a new space using Isomap
isomap = Isomap(n_components=10, n_neighbors=20)
train_data_isomap = np.float32(isomap.fit_transform(train_data[:n_train_samples, :]))
if method == 'linear-regression':
# Use linear regression to provide baseline out-of-sample extensions
proj = LinearRegression()
proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap))
acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
proj.predict(test_data), test_labels)
elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
# Use the SEF to provide out-of-sample extensions
if method == 'c-ISOMAP-10d':
proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
proj.cuda()
else:
proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
proj.cuda()
loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy',
epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1)
acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
proj.transform(test_data), test_labels)
print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method='kernel-regression'):
# Load the data and init seeds
train_data, train_labels, test_data, test_labels = load_mnist()
np.random.seed(1)
sklearn.utils.check_random_state(1)
n_train_samples = 5000
# Learn a new space using Isomap
isomap = Isomap(n_components=10, n_neighbors=20)
train_data_isomap = np.float32(isomap.fit_transform(train_data[:n_train_samples, :]))
sigma = mean_data_distance(np.float32(train_data[:n_train_samples, :]))
if method == 'kernel-regression':
# Use kernel regression to provide baseline out-of-sample extensions
proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma ** 2))
proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap))
acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
proj.predict(test_data), test_labels)
elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d':
# Use the SEF to provide out-of-sample extensions
if method == 'cK-ISOMAP-10d':
dims = 10
else:
dims = 20
proj = KernelSEF(train_data[:n_train_samples], train_data.shape[1], output_dimensionality=dims)
proj.cuda()
loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy',
epochs=100, batch_size=128, verbose=True, learning_rate=0.00001, regularizer_weight=0.001)
acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
proj.transform(test_data), test_labels)
print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None):
np.random.seed(1)
sklearn.utils.check_random_state(1)
train_data, train_labels, test_data, test_labels = dataset_loader(dataset, seed=1)
# Learn a new space using Isomap
isomap = Isomap(n_components=10, n_neighbors=20)
train_data_isomap = np.float32(isomap.fit_transform(train_data))
if method == 'linear-regression':
from sklearn.preprocessing import StandardScaler
std = StandardScaler()
train_data = std.fit_transform(train_data)
test_data = std.transform(test_data)
# Use linear regression to provide baseline out-of-sample extensions
proj = LinearRegression()
proj.fit(np.float64(train_data), np.float64(train_data_isomap))
acc = evaluate_svm(proj.predict(train_data), train_labels,
proj.predict(test_data), test_labels)
elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
# Use the SEF to provide out-of-sample extensions
if method == 'c-ISOMAP-10d':
proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
proj.cuda()
else:
proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
proj.cuda()
loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy',
epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1)
acc = evaluate_svm(proj.transform(train_data), train_labels,
proj.transform(test_data), test_labels)
print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None):
np.random.seed(1)
sklearn.utils.check_random_state(1)
train_data, train_labels, test_data, test_labels = dataset_loader(dataset, seed=1)
# Learn a new space using Isomap
isomap = Isomap(n_components=10, n_neighbors=20)
train_data_isomap = np.float32(isomap.fit_transform(train_data))
sigma = mean_data_distance(np.float32(train_data))
if method == 'kernel-regression':
# Use kernel regression to provide baseline out-of-sample extensions
proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma ** 2))
proj.fit(np.float64(train_data), np.float64(train_data_isomap))
acc = evaluate_svm(proj.predict(train_data), train_labels,
proj.predict(test_data), test_labels)
elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d':
# Use the SEF to provide out-of-sample extensions
if method == 'cK-ISOMAP-10d':
dims = 10
else:
dims = 20
proj = KernelSEF(train_data, train_data.shape[1], output_dimensionality=dims)
proj.cuda()
loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy',
epochs=100, batch_size=256, verbose=True, learning_rate=0.00001, regularizer_weight=0.001)
acc = evaluate_svm(proj.transform(train_data), train_labels,
proj.transform(test_data), test_labels)
print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def do_embedding(self, event=None):
converted = self.parent.converted
if converted is None:
#self.conversion.convert_frames()
self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
converted = self.parent.converted
method_ind = self.method.currentIndex()
print('Doing %s' % self.method.currentText())
if method_ind == 0:
self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
elif method_ind == 1:
self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
elif method_ind == 2:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
elif method_ind == 3:
self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
elif method_ind == 4:
self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
elif method_ind == 5:
self.embedder = manifold.TSNE(n_components=3, init='pca')
self.embedder.fit(converted)
self.embed = self.embedder.embedding_
self.embed_plot = self.embed
self.gen_hist()
self.plot_embedding()
if not self.embedded:
self.add_classes_frame()
self.embedded = True
def test_Isomap(*data):
'''
test Isomap method
:param data: train_data, train_value
:return: None
'''
X,y=data
for n in [4,3,2,1]:
isomap=manifold.Isomap(n_components=n)
isomap.fit(X)
print('reconstruction_error(n_components=%d) : %s'%
(n, isomap.reconstruction_error()))
def plot_Isomap_k(*data):
'''
test the performance with different n_neighbors and reduce to 2-D
:param data: train_data, train_value
:return: None
'''
X,y=data
Ks=[1,5,25,y.size-1]
fig=plt.figure()
for i, k in enumerate(Ks):
isomap=manifold.Isomap(n_components=2,n_neighbors=k)
X_r=isomap.fit_transform(X)
ax=fig.add_subplot(2,2,i+1)
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
for label ,color in zip( np.unique(y),colors):
position=y==label
ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}"
.format(label),color=color)
ax.set_xlabel("X[0]")
ax.set_ylabel("X[1]")
ax.legend(loc="best")
ax.set_title("k={0}".format(k))
plt.suptitle("Isomap")
plt.show()
def plot_Isomap_k_d1(*data):
'''
test the performance with different n_neighbors and reduce to 1-D
:param data: train_data, train_value
:return: None
'''
X,y=data
Ks=[1,5,25,y.size-1]
fig=plt.figure()
for i, k in enumerate(Ks):
isomap=manifold.Isomap(n_components=1,n_neighbors=k)
X_r=isomap.fit_transform(X)
ax=fig.add_subplot(2,2,i+1)
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
for label ,color in zip( np.unique(y),colors):
position=y==label
ax.scatter(X_r[position],np.zeros_like(X_r[position]),
label="target= {0}".format(label),color=color)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.legend(loc="best")
ax.set_title("k={0}".format(k))
plt.suptitle("Isomap")
plt.show()
def test_isomap_reconstruction_error():
# Same setup as in test_isomap_simple_grid, with an added dimension
N_per_side = 5
Npts = N_per_side ** 2
n_neighbors = Npts - 1
# grid of equidistant points in 2D, n_components = n_dim
X = np.array(list(product(range(N_per_side), repeat=2)))
# add noise in a third dimension
rng = np.random.RandomState(0)
noise = 0.1 * rng.randn(Npts, 1)
X = np.concatenate((X, noise), 1)
# compute input kernel
G = neighbors.kneighbors_graph(X, n_neighbors,
mode='distance').toarray()
centerer = preprocessing.KernelCenterer()
K = centerer.fit_transform(-0.5 * G ** 2)
for eigen_solver in eigen_solvers:
for path_method in path_methods:
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
eigen_solver=eigen_solver,
path_method=path_method)
clf.fit(X)
# compute output kernel
G_iso = neighbors.kneighbors_graph(clf.embedding_,
n_neighbors,
mode='distance').toarray()
K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)
# make sure error agrees
reconstruction_error = np.linalg.norm(K - K_iso) / Npts
assert_almost_equal(reconstruction_error,
clf.reconstruction_error())
def test_pipeline():
# check that Isomap works fine as a transformer in a Pipeline
# only checks that no error is raised.
# TODO check that it actually does something useful
X, y = datasets.make_blobs(random_state=0)
clf = pipeline.Pipeline(
[('isomap', manifold.Isomap()),
('clf', neighbors.KNeighborsClassifier())])
clf.fit(X, y)
assert_less(.9, clf.score(X, y))
def test_isomap_clone_bug():
# regression test for bug reported in #6062
model = manifold.Isomap()
for n_neighbors in [10, 15, 20]:
model.set_params(n_neighbors=n_neighbors)
model.fit(np.random.rand(50, 2))
assert_equal(model.nbrs_.n_neighbors,
n_neighbors)
def init_UI(self):
self.vbox = QtWidgets.QVBoxLayout(self)
#label = QtWidgets.QLabel('Spectral manifold embedding', self)
#self.vbox.addWidget(label)
self.method = QtWidgets.QComboBox(self)
self.vbox.addWidget(self.method)
self.method.addItem('Spectral Embedding')
self.method.addItem('Isomap')
self.method.addItem('Modified LLE')
self.method.addItem('Hessian LLE')
self.method.addItem('Multi-dimensional Scaling')
self.method.addItem('t-Stochastic Neighbor Embedding')
hbox = QtWidgets.QHBoxLayout()
self.vbox.addLayout(hbox)
button = QtWidgets.QPushButton('Embed', self)
button.clicked.connect(self.do_embedding)
hbox.addWidget(button)
self.track_flag = QtWidgets.QCheckBox('Draw ROI', self)
self.track_flag.setChecked(False)
self.track_flag.stateChanged.connect(self.track_flag_changed)
hbox.addWidget(self.track_flag)
hbox.addStretch(1)
hbox = QtWidgets.QHBoxLayout()
self.vbox.addLayout(hbox)
label = QtWidgets.QLabel('X-axis:', self)
hbox.addWidget(label)
self.x_axis_num = QtWidgets.QLineEdit('0', self)
self.x_axis_num.setFixedWidth(24)
self.x_axis_num.editingFinished.connect(self.gen_hist)
hbox.addWidget(self.x_axis_num)
label = QtWidgets.QLabel('Y-axis:', self)
hbox.addWidget(label)
self.y_axis_num = QtWidgets.QLineEdit('1', self)
self.y_axis_num.setFixedWidth(24)
self.y_axis_num.editingFinished.connect(self.gen_hist)
hbox.addWidget(self.y_axis_num)
hbox.addStretch(1)
self.vbox.addStretch(1)
def create_isomap_features(features, model):
r"""Create Isomap features.
Parameters
----------
features : numpy array
The input features.
model : alphapy.Model
The model object with the Isomap parameters.
Returns
-------
ifeatures : numpy array
The Isomap features.
Notes
-----
Isomaps are very memory-intensive. Your process will be killed
if you run out of memory.
References
----------
You can find more information on Principal Component Analysis here [ISO]_.
.. [ISO] http://scikit-learn.org/stable/modules/manifold.html#isomap
"""
logger.info("Creating Isomap Features")
# Extract model parameters
iso_components = model.specs['iso_components']
iso_neighbors = model.specs['iso_neighbors']
n_jobs = model.specs['n_jobs']
# Log model parameters
logger.info("Isomap Components : %d", iso_components)
logger.info("Isomap Neighbors : %d", iso_neighbors)
# Generate Isomap features
model = Isomap(n_neighbors=iso_neighbors, n_components=iso_components,
n_jobs=n_jobs)
ifeatures = model.fit_transform(features)
# Return new Isomap features
logger.info("Isomap Feature Count : %d", ifeatures.shape[1])
return ifeatures
#
# Function create_tsne_features
#