def test_rfe_features_importance():
generator = check_random_state(0)
iris = load_iris()
X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
y = iris.target
clf = RandomForestClassifier(n_estimators=20,
random_state=generator, max_depth=2)
rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
rfe.fit(X, y)
assert_equal(len(rfe.ranking_), X.shape[1])
clf_svc = SVC(kernel="linear")
rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
rfe_svc.fit(X, y)
# Check if the supports are equal
assert_array_equal(rfe.get_support(), rfe_svc.get_support())
python类load_iris()的实例源码
def test_graph_lasso_iris():
# Hard-coded solution from R glasso package for alpha=1.0
# The iris datasets in R and sklearn do not match in a few places, these
# values are for the sklearn version
cov_R = np.array([
[0.68112222, 0.0, 0.2651911, 0.02467558],
[0.00, 0.1867507, 0.0, 0.00],
[0.26519111, 0.0, 3.0924249, 0.28774489],
[0.02467558, 0.0, 0.2877449, 0.57853156]
])
icov_R = np.array([
[1.5188780, 0.0, -0.1302515, 0.0],
[0.0, 5.354733, 0.0, 0.0],
[-0.1302515, 0.0, 0.3502322, -0.1686399],
[0.0, 0.0, -0.1686399, 1.8123908]
])
X = datasets.load_iris().data
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R)
assert_array_almost_equal(icov, icov_R)
def test_graph_lasso_iris_singular():
# Small subset of rows to test the rank-deficient case
# Need to choose samples such that none of the variances are zero
indices = np.arange(10, 13)
# Hard-coded solution from R glasso package for alpha=0.01
cov_R = np.array([
[0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
[0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
[0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
[0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
])
icov_R = np.array([
[24.42244057, -16.831679593, 0.0, 0.0],
[-16.83168201, 24.351841681, -6.206896552, -12.5],
[0.0, -6.206896171, 153.103448276, 0.0],
[0.0, -12.499999143, 0.0, 462.5]
])
X = datasets.load_iris().data[indices, :]
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R, decimal=5)
assert_array_almost_equal(icov, icov_R, decimal=5)
grid_search_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def testIrisDNN(self):
if HAS_SKLEARN:
random.seed(42)
iris = datasets.load_iris()
feature_columns = learn.infer_real_valued_columns_from_input(iris.data)
classifier = learn.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3)
grid_search = GridSearchCV(
classifier, {'hidden_units': [[5, 5], [10, 10]]},
scoring='accuracy',
fit_params={'steps': [50]})
grid_search.fit(iris.data, iris.target)
score = accuracy_score(iris.target, grid_search.predict(iris.data))
self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
def load_iris():
try:
# Load Iris dataset from the sklearn.datasets package
from sklearn import datasets
from sklearn import decomposition
# Load Dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
labels = iris.target_names
# Reduce components by Principal Component Analysis from sklearn
X = decomposition.PCA(n_components=3).fit_transform(X)
except ImportError:
# Load Iris dataset manually
path = os.path.join('data', 'iris', 'iris.data')
iris_data = np.genfromtxt(path, dtype='str', delimiter=',')
X = iris_data[:, :4].astype(dtype=float)
y = np.ndarray((X.shape[0],), dtype=int)
# Create target vector y and corresponding labels
labels, idx = [], 0
for i, label in enumerate(iris_data[:, 4]):
label = label.split('-')[1]
if label not in labels:
labels.append(label); idx += 1
y[i] = idx - 1
# Reduce components by implemented Principal Component Analysis
X = PCA(X, 3)[0]
return X, y, labels
def datablock():
X,y = load_iris(return_X_y=True)
df = pd.DataFrame(X,columns=['var%d'%i for i in range(4)])
df['target'] = y
#make 1 variable categorical
df['var3'] = df['var3'].apply(lambda x: int(x)).astype(object)
#make outcome binary
df['target'].loc[df['target']==2]=1
return DataBlock(df,df,df,'target')
def make_cl_dataset_and_field_manager(self):
iris = datasets.load_iris()
dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names)
feature_fields = []
for i, name in enumerate(dataset.feature_names):
f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i]))
feature_fields.append(f)
target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2})
field_manager = FieldManager(-1, feature_fields, target)
return dataset, field_manager
def make_dataset_and_field_manager(self):
iris = datasets.load_iris()
dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names)
feature_fields = []
for i, name in enumerate(dataset.feature_names):
f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i]))
feature_fields.append(f)
target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2})
field_manager = FieldManager(-1, feature_fields, target)
return dataset, field_manager
def load_iris_df(include_tgt=True, tgt_name="Species", shuffle=False):
"""Loads the iris dataset into a dataframe with the
target set as the "Species" feature or whatever name
is specified in ``tgt_name``.
Parameters
----------
include_tgt : bool, optional (default=True)
Whether to include the target
tgt_name : str, optional (default="Species")
The name of the target feature
shuffle : bool, optional (default=False)
Whether to shuffle the rows on return
Returns
-------
X : pd.DataFrame, shape=(n_samples, n_features)
The loaded dataset
"""
iris = load_iris()
X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
if include_tgt:
X[tgt_name] = iris.target
return X if not shuffle else shuffle_dataframe(X)
def backward(self, dz):
dx, dw, db = layers.linear_backward(dz, self.cache1)
return dx, dw, db
# iris = datasets.load_iris()
# X = iris.data
# Y = iris.target
# Y = to_categorical(iris.target, 3)
def iris_softmax():
print("Initializing net for Iris dataset classification problem. . .")
iris = load_iris()
X = iris.data
Y = iris.target
dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.05}, loss_fn='softmax')
dn.addlayer("ReLU", 4)
dn.addlayer("ReLU", 6)
dn.addlayer("ReLU", 3)
for i in range(600):
print("Iteration: ", i)
dn.train(X, Y)
def iris_svm():
print("Initializing net for Iris dataset classification problem. . .")
iris = load_iris()
X = iris.data
Y = iris.target
dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.01}, loss_fn='svm')
dn.addlayer("ReLU", 4)
dn.addlayer("ReLU", 6)
dn.addlayer("ReLU", 3)
for i in range(1000):
print("Iteration: ", i)
dn.train(X, Y)
# def iris_svm_momentum():
# print("Initializing net for Iris dataset classification problem. . .")
# iris = load_iris()
# X = iris.data
# Y = iris.target
#
# dn = DenseNet(input_dim=4, optim_config={"type": "momentum", "learning_rate": 0.01, "momentum":0.5}, loss_fn='svm')
# dn.addlayer("ReLU", 4)
# dn.addlayer("ReLU", 6)
# dn.addlayer("ReLU", 3)
#
# for i in range(1000):
# print("Iteration: ", i)
# dn.train(X, Y)
#two_bit_xor_sigmoid()
def _check_iris_imputation(_impute_fn):
iris = load_iris()
X = iris.data
# some values missing only
rng = np.random.RandomState(0)
X_some_missing = X.copy()
mask = np.abs(X[:, 2] - rng.normal(loc=5.5, scale=.7, size=X.shape[0])) < .6
X_some_missing[mask, 3] = np.NaN
X_imputed = _impute_fn(X_some_missing, np.isnan(X_some_missing), k=3)
mean_abs_diff = np.mean(np.abs(X - X_imputed))
print(mean_abs_diff)
assert mean_abs_diff < 0.05, "Difference too big: %0.4f" % mean_abs_diff
def lession_4():
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
# print iris_X[:2]
# print iris_y
X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.3)
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
print knn.predict(X_test)
print y_test
# dataset usage
def train_model(split=.25):
"""Tran model based on the iris dataset.
This will split the iris dataset into train and test set, will
train a Random Forest CLassifier and fit the trained model to
the test dataset.
In addition the confusion matrix and features importance will be
calculated.
Args:
split (float): Fraction of observations in the test dataset.
Returns:
RandomForestClassifier: Trained model.
pandas.DataFrame: Confusion matrix.
dictionary: Features importance
"""
iris = load_iris()
all_data = pd.DataFrame(iris.data, columns=iris.feature_names)
features = all_data.columns.str.replace('\s+', '_').str.replace('\W+', '')
all_data['species'] = pd.Categorical.from_codes(iris.target,
iris.target_names)
train, test = train_test_split(all_data, test_size=split)
clf = RandomForestClassifier(n_jobs=1)
clf.fit(train.drop('species', axis=1), train.species)
preds = clf.predict(test.drop('species', axis=1))
conf_matrix = pd.crosstab(test['species'], preds,
rownames=['Actual Species'],
colnames=['Predicted Species'])
f_importances = list(zip(train.drop('species', axis=1).columns,
clf.feature_importances_))
return clf, conf_matrix, f_importances, features
def __init__(self):
self.iris = datasets.load_iris()
self.count = 3
self.xaxis = 0
self.yaxis = 1
def __init__(self):
self.iris = datasets.load_iris()
def setUp(self):
iris = datasets.load_iris()
rng = check_random_state(0)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]
self.iris = iris
def setUp(self):
iris = datasets.load_iris()
rng = check_random_state(0)
iris.data = iris.data
iris.target = iris.target
self.iris = iris
for csv_file in glob.glob("*.csv"):
os.remove(csv_file)
def main():
# Load the dataset
data = datasets.load_iris()
X = normalize(data.data)
y = data.target
# Project the data onto the 2 primary components
multi_class_lda = MultiClassLDA()
multi_class_lda.plot_in_2d(X, y, title="LDA")