def make_3_circles(n_samples, random_state=1):
random_state = check_random_state(random_state)
X = np.ones((3 * n_samples, 3))
Y_plot = np.ones((3 * n_samples, 1))
X[:n_samples, :2], _ = make_circles(n_samples=n_samples, noise=0.05, factor=.01, random_state=random_state)
X[:n_samples, 2] *= -1
Y_plot[:n_samples, 0] = 1
X[n_samples:2 * n_samples, :2], _ = make_circles(n_samples=n_samples,
noise=0.05, factor=.01, random_state=random_state)
X[n_samples:2 * n_samples, 2] = 0
Y_plot[n_samples:2 * n_samples, 0] = 2
X[2 * n_samples:, :2], _ = make_circles(n_samples=n_samples, noise=0.05, factor=.01, random_state=random_state)
Y_plot[2 * n_samples:, 0] = 3
# shuffle examples
idx = random_state.permutation(list(range(3 * n_samples)))
X, Y_plot = X[idx, :], Y_plot[idx, :]
# cut to actual size
X, Y_plot = X[:n_samples, :], Y_plot[:n_samples, :]
return X, Y_plot
python类make_circles()的实例源码
def test_random_trees_dense_equal():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning the same array for both argument values.
# Create the RTEs
hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
random_state=0)
hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
random_state=0)
X, y = datasets.make_circles(factor=0.5)
X_transformed_dense = hasher_dense.fit_transform(X)
X_transformed_sparse = hasher_sparse.fit_transform(X)
# Assert that dense and sparse hashers have same array.
assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)
# Ignore warnings from switching to more power iterations in randomized_svd
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two SVD dimensions
# Note: Not all random_states produce perfect results.
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
assert_array_equal(hasher.fit(X).transform(X).toarray(),
X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.)
def circles():
return datasets.make_circles()
def circles(n_samples=200, factor=0.5, noise=None, regular=True,
random_state=0):
def make_circles(n_samples=100, noise=None,
random_state=None, factor=.8):
if regular:
if factor > 1 or factor < 0:
raise ValueError("'factor' has to be between 0 and 1.")
generator = check_random_state(random_state)
# so as not to have the first point = last point, we add
# one and then remove it.
linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1]
outer_circ_x = np.cos(linspace)
outer_circ_y = np.sin(linspace)
inner_circ_x = outer_circ_x * factor
inner_circ_y = outer_circ_y * factor
X = np.vstack((np.hstack((outer_circ_x, inner_circ_x)),
np.hstack((outer_circ_y, inner_circ_y)))).T
y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp),
np.ones(n_samples // 2, dtype=np.intp)])
if noise is not None:
X += generator.normal(scale=noise, size=X.shape)
return X, y
else:
return sk_datasets.make_circles(n_samples=n_samples,
shuffle=False, noise=noise,
random_state=random_state,
factor=factor)
X, gt = make_circles(n_samples=n_samples, factor=factor, noise=noise,
random_state=random_state)
return X, gt
def test_random_trees_dense_type():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning a dense array.
# Create the RTE with sparse=False
hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
assert_equal(type(X_transformed), np.ndarray)
def test_gridsearch_pipeline():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="rbf", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
grid_search.fit(X, y)
assert_equal(grid_search.best_score_, 1)
def test_gridsearch_pipeline_precomputed():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model using a precomputed kernel.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="precomputed", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
param_grid = dict(Perceptron__n_iter=np.arange(1, 5))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
X_kernel = rbf_kernel(X, gamma=2.)
grid_search.fit(X_kernel, y)
assert_equal(grid_search.best_score_, 1)
def makeSimpleDatasets(n_samples=1500): # from sklearn example
np.random.seed(0)
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
noise=.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
no_structure = np.random.rand(n_samples, 2), None
return [noisy_circles, noisy_moons, blobs, no_structure]
def classification(dataset=0):
# generate training and test data
n_train = 1000
if dataset == 0:
X, Y = make_classification(n_samples=n_train, n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
X_test, Y_test = make_classification(n_samples=50, n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
X_test += 2 * rng.uniform(size=X_test.shape)
elif dataset == 1:
X, Y = make_moons(n_samples=n_train, noise=0.3, random_state=0)
X_test, Y_test = make_moons(n_samples=50, noise=0.3, random_state=1)
elif dataset == 2:
X, Y = make_circles(n_samples=n_train, noise=0.2, factor=0.5, random_state=1)
X_test, Y_test = make_circles(n_samples=50, noise=0.2, factor=0.5, random_state=1)
else:
print("dataset unknown")
return
# build, train, and test the model
model = SupervisedNNModel(X.shape[1], 2, hunits=[100, 50], activations=[T.tanh, T.tanh, T.nnet.softmax], cost_fun='negative_log_likelihood',
error_fun='zero_one_loss', learning_rate=0.01, L1_reg=0., L2_reg=0.)
model.fit(X, Y)
print("Test Error: %f" % model.score(X_test, Y_test))
# plot dataset + predictions
plt.figure()
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=cm, alpha=.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=cm_bright, alpha=0.6)
# and testing points
plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test, cmap=cm_bright)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title('Classification Problem (%i)' % dataset)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--save', type=str, default='work')
parser.add_argument('--nEpoch', type=int, default=100)
# parser.add_argument('--testBatchSz', type=int, default=2048)
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--model', type=str, default="picnn",
choices=['picnn', 'ficnn'])
parser.add_argument('--dataset', type=str, default="moons",
choices=['moons', 'circles', 'linear'])
parser.add_argument('--noncvx', action='store_true')
args = parser.parse_args()
npr.seed(args.seed)
tf.set_random_seed(args.seed)
setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))
save = os.path.join(os.path.expanduser(args.save),
"{}.{}".format(args.model, args.dataset))
if os.path.isdir(save):
shutil.rmtree(save)
os.makedirs(save, exist_ok=True)
if args.dataset == "moons":
(dataX, dataY) = make_moons(noise=0.3, random_state=0)
elif args.dataset == "circles":
(dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
dataY = 1.-dataY
elif args.dataset == "linear":
(dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
dataX += 2 * rng.uniform(size=dataX.shape)
else:
assert(False)
dataY = dataY.reshape((-1, 1)).astype(np.float32)
nData = dataX.shape[0]
nFeatures = dataX.shape[1]
nLabels = 1
nXy = nFeatures + nLabels
config = tf.ConfigProto() #log_device_placement=False)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
model.train(args, dataX, dataY)