def train(self, training_set, training_target):
clf = MLPClassifier(solver='adam', alpha=1e-5,
hidden_layer_sizes=(10, 6), random_state=1)
clf.fit(training_set, training_target)
joblib.dump(clf, 'output/MLP.pkl')
python类MLPClassifier()的实例源码
def getModels():
result = []
result.append("LinearRegression")
result.append("BayesianRidge")
result.append("ARDRegression")
result.append("ElasticNet")
result.append("HuberRegressor")
result.append("Lasso")
result.append("LassoLars")
result.append("Rigid")
result.append("SGDRegressor")
result.append("SVR")
result.append("MLPClassifier")
result.append("KNeighborsClassifier")
result.append("SVC")
result.append("GaussianProcessClassifier")
result.append("DecisionTreeClassifier")
result.append("RandomForestClassifier")
result.append("AdaBoostClassifier")
result.append("GaussianNB")
result.append("LogisticRegression")
result.append("QuadraticDiscriminantAnalysis")
return result
def test_partial_fit_classes_error():
# Tests that passing different classes to partial_fit raises an error"""
X = [[3, 2]]
y = [0]
clf = MLPClassifier(algorithm='sgd')
clf.partial_fit(X, y, classes=[0, 1])
assert_raises(ValueError, clf.partial_fit, X, y, classes=[1, 2])
def test_partial_fit_errors():
# Test partial_fit error handling."""
X = [[3, 2], [1, 6]]
y = [1, 0]
# no classes passed
assert_raises(ValueError,
MLPClassifier(
algorithm='sgd').partial_fit,
X, y,
classes=[2])
# l-bfgs doesn't support partial_fit
assert_false(hasattr(MLPClassifier(algorithm='l-bfgs'), 'partial_fit'))
def test_params_errors():
# Test that invalid parameters raise value error"""
X = [[3, 2], [1, 6]]
y = [1, 0]
clf = MLPClassifier
assert_raises(ValueError, clf(hidden_layer_sizes=-1).fit, X, y)
assert_raises(ValueError, clf(max_iter=-1).fit, X, y)
assert_raises(ValueError, clf(shuffle='true').fit, X, y)
assert_raises(ValueError, clf(alpha=-1).fit, X, y)
assert_raises(ValueError, clf(learning_rate_init=-1).fit, X, y)
assert_raises(ValueError, clf(algorithm='hadoken').fit, X, y)
assert_raises(ValueError, clf(learning_rate='converge').fit, X, y)
assert_raises(ValueError, clf(activation='cloak').fit, X, y)
def test_sparse_matrices():
# Test that sparse and dense input matrices output the same results."""
X = X_digits_binary[:50]
y = y_digits_binary[:50]
X_sparse = csr_matrix(X)
mlp = MLPClassifier(random_state=1, hidden_layer_sizes=15)
mlp.fit(X, y)
pred1 = mlp.decision_function(X)
mlp.fit(X_sparse, y)
pred2 = mlp.decision_function(X_sparse)
assert_almost_equal(pred1, pred2)
pred1 = mlp.predict(X)
pred2 = mlp.predict(X_sparse)
assert_array_equal(pred1, pred2)
def test_tolerance():
# Test tolerance.
# It should force the algorithm to exit the loop when it converges.
X = [[3, 2], [1, 6]]
y = [1, 0]
clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd', verbose=10)
clf.fit(X, y)
assert_greater(clf.max_iter, clf.n_iter_)
def test_verbose_sgd():
# Test verbose.
X = [[3, 2], [1, 6]]
y = [1, 0]
clf = MLPClassifier(algorithm='sgd', max_iter=2, verbose=10,
hidden_layer_sizes=2)
old_stdout = sys.stdout
sys.stdout = output = StringIO()
clf.fit(X, y)
clf.partial_fit(X, y)
sys.stdout = old_stdout
assert 'Iteration' in output.getvalue()
def test_early_stopping():
X = X_digits_binary[:100]
y = y_digits_binary[:100]
tol = 0.2
clf = MLPClassifier(tol=tol, max_iter=3000, algorithm='sgd',
early_stopping=True)
clf.fit(X, y)
assert_greater(clf.max_iter, clf.n_iter_)
valid_scores = clf.validation_scores_
best_valid_score = clf.best_validation_score_
assert_equal(max(valid_scores), best_valid_score)
assert_greater(best_valid_score + tol, valid_scores[-2])
assert_greater(best_valid_score + tol, valid_scores[-1])
def test_adaptive_learning_rate():
X = [[3, 2], [1, 6]]
y = [1, 0]
clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd',
learning_rate='adaptive', verbose=10)
clf.fit(X, y)
assert_greater(clf.max_iter, clf.n_iter_)
assert_greater(1e-6, clf._optimizer.learning_rate)
def indexThreeMLP():
x = 10 ** 7
#?????
cur1 = conn.cursor()
cur1.execute('select * from szzs_black_swan limit 2,9999999999999999;')
result1 = cur1.fetchall()
fv = [] #??
for res in result1:
a = []
a.append(float(list(res)[3]))
a.append(float(list(res)[5])/x) #????????
fv.append(a)
cur2 = conn.cursor()
cur2.execute('select rise_fall_next from szzs_black_swan limit 2,9999999999999999;')
result2 = cur2.fetchall()
cla = [] #??
for res in result2:
cla.append(int(list(res)[0]))
cur3 = conn.cursor()
cur3.execute('select * from szzs_black_swan order by date desc;')
result3 = cur3.fetchmany(1)
test = [] #????
for res in result3:
test.append(float(list(res)[3]))
test.append(float(list(res)[5])/x)
fv = np.array(fv)
cla = np.array(cla)
test = np.array(test)
fv = np.array(fv)
cla = np.array(cla)
test = np.array(test)
mlp = MLPClassifier(solver='lbfgs', alpha=0.0001, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto')
mlp.fit(fv, cla) # ????????
test = [test] # ???????????
prediction = mlp.predict(test)
return test, prediction
def worker10(fv1, cla1, test1):
func_name = sys._getframe().f_code.co_name
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto')
mlp.fit(fv1, cla1) # ????????
prediction = mlp.predict(test1)
q.put((prediction, func_name))
# ????
def worker11(fv2, cla2, test2):
func_name = sys._getframe().f_code.co_name
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto')
mlp.fit(fv2, cla2) # ????????
prediction = mlp.predict(test2)
q.put((prediction, func_name))
# ????
def train_test(train, test, res_dir="res/", disp=True, outfilename=None):
"""Description of compare
compare multiple classifier and display the best one
"""
utils.print_success("Comparison of differents classifiers")
if train is not None and test is not None:
train_features = []
test_features = []
train_groundtruths = []
test_groundtruths = []
for elem in train:
train_groundtruths.append(elem)
train_features.append(train[elem])
for elem in test:
test_groundtruths.append(elem)
test_features.append(test[elem])
else:
utils.print_error("No valid data provided.")
res_dir = utils.create_dir(res_dir)
classifiers = {
# "RandomForest": RandomForestClassifier(n_estimators=5),
"KNeighbors":KNeighborsClassifier(1),
# "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
# "DecisionTree":DecisionTreeClassifier(max_depth=5),
# "MLP":MLPClassifier(),
# "AdaBoost":AdaBoostClassifier(),
# "GaussianNB":GaussianNB(),
# "QDA":QuadraticDiscriminantAnalysis(),
# "SVM":SVC(kernel="linear", C=0.025),
# "GradientBoosting":GradientBoostingClassifier(),
# "ExtraTrees":ExtraTreesClassifier(),
# "LogisticRegression":LogisticRegression(),
# "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
}
for key in classifiers:
utils.print_success(key)
clf = classifiers[key]
utils.print_info("\tFit")
clf.fit(train_features, train_groundtruths)
utils.print_info("\tPredict")
predictions = clf.predict(test_features)
print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average='weighted')))
print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average='weighted')))
print("F1 weighted\t" + str(f1_score(test_groundtruths, predictions, average='weighted')))
# print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average=None)))
# print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average=None)))
# print("f1 weighted\t" + str(f1_score(test_groundtruths, predictions, average=None)))
def main():
# load and split data
start = time()
# check if file exists
data_file = "data/creditcard.csv"
if not isfile(data_file):
try:
# download the data set
# Note: it is around 180MB
data_url = "https://github.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/raw/master/creditcard.csv"
urlretrieve(data_url, data_file)
print("download data file to %s" % data_file)
except Error:
print("can't access or download the data set")
print("please try to download it manually and put into data/creditcard.csv")
sys.exit()
dataset, target = load_dataset(data_file)
print("Loaded data in %.4f seconds" % (time() - start))
start = time()
x_train, x_test, y_train, y_test = train_test_split(
dataset, target, test_size=.2, random_state=42)
print("Training set size:%d, Testing set size: %d" %
(len(x_train), len(x_test)))
print("Prepared data for models in %.4f seconds" % (time() - start))
scores = []
models = {"GNB": GaussianNB(),
"DT": DecisionTreeClassifier(max_depth=5),
"MLP": MLPClassifier(alpha=1.0),
#"LSVC": SVC(kernel="linear", C=0.025), # very slow as there is too much data
"NN": KNeighborsClassifier(),
"RF": RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
"ABC": AdaBoostClassifier(),
"SGD": SGDClassifier(),
}
names = []
for k, model in models.items():
print("Running %s" % k)
start = time()
fitted_model = model.fit(x_train, y_train)
print("Training time: %.4f seconds" % (time() - start))
start = time()
y_predicted = fitted_model.predict(x_test)
print("Testing time: %.4f seconds" % (time() - start))
scores.append(display(y_test, y_predicted,
save="figures/" + k + ".png"))
names.append(k)
# scatter plot scores of all the models
plot_scores(scores, names, save="figures/scores.png")
def classes_combination(self, sensors_set):
features = list(self.dataset.get_sensors_set_features(sensors_set))
class_combination = list(itertools.combinations(self.classes, 2))
train = self.dataset.get_train.copy()
test = self.dataset.get_test.copy()
if not os.path.exists(const.DIR_RESULTS):
os.makedirs(const.DIR_RESULTS)
with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_TWO_CLASSES_COMBINATION, 'w') as f:
f.write("combination, algorithm, accuracy")
for combination in class_combination:
cc_train = train.loc[(train['target'] == combination[0]) | (train['target'] == combination[1])]
cc_test = test.loc[(test['target'] == combination[0]) | (test['target'] == combination[1])]
train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
cc_train, cc_test, features)
# buil all classifier
classifier_tree = tree.DecisionTreeClassifier()
classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR)
classifier_nn = MLPClassifier(hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set],),
alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER,
tol=const.PAR_NN_TOL)
classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set],
verbose=False)
# train all classifier
classifier_tree.fit(train_features, train_classes)
classifier_forest.fit(train_features, train_classes)
classifier_nn.fit(train_features, train_classes)
classifier_svm.fit(train_features, train_classes)
# use classifier on test set
test_prediction_tree = classifier_tree.predict(test_features)
test_prediction_forest = classifier_forest.predict(test_features)
test_prediction_nn = classifier_nn.predict(test_features)
test_prediction_svm = classifier_svm.predict(test_features)
# evaluate classifier
acc_tree = accuracy_score(test_classes, test_prediction_tree)
acc_forest = accuracy_score(test_classes, test_prediction_forest)
acc_nn = accuracy_score(test_classes, test_prediction_nn)
acc_svm = accuracy_score(test_classes, test_prediction_svm)
# print result
print(str(combination))
print("DECISION TREE : ", str(acc_tree))
f.write(str(combination) + ", DT ," + str(acc_tree) + "\n")
print("RANDOM FOREST : ", str(acc_forest))
f.write(str(combination) + ", RF ," + str(acc_forest) + "\n")
print("NEURAL NETWORK : ", str(acc_nn))
f.write(str(combination) + ", NN ," + str(acc_nn) + "\n")
print("SUPPORT VECTOR MACHINE : ", str(acc_svm))
f.write(str(combination) + ", SVM ," + str(acc_svm) + "\n")
# use different algorithms leaving one subject out from training and testing only on this subject -
# considering all classes in dataset and only user classes
def genderclassify(sentence):
"""
genderclassify tags with the help of multilayer perceptron classifier
trained over word vectors created with gensim's word2vec
:param sentence: string to be tokenized and tagged
:type sentence: string
:return: Returns a List of tuples of the form [(token1, genderTag), (token2, genderTag)...]
:rtype: List of Tuples.
"""
sentences = sent.drawlist()
sentences2 = sents.drawlist()
sentences2.append(sentence)
sentences = sentences + sentences2
sentences = [tok.wordtokenize(i) for i in sentences]
sentence = tok.wordtokenize(sentence)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model = gensim.models.Word2Vec(sentences, size =10, min_count=1)
pred = []
for word in sentence:
pred.append(model.wv[word].tolist())
genders = gndr.drawlist()
vector = [i[0] for i in genders]
tags = [i[1] for i in genders]
print(tags)
X = vector
y = tags
clf = MLPClassifier(solver='sgd', alpha= 1e-5,
hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)
predictions = clf.predict_proba(pred).tolist()
predictions = [genderdecode(i) for i in predictions]
print(predictions)
for index,item in enumerate(sentence):
sentence[index] = (sentence[index], predictions[index])
return(sentence)
def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2):
clf = None
precision = []
recall = []
fscore = []
if classifier == "NN":
clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None)
elif classifier == "LR":
clf = linear_model.LogisticRegression(C=1e3)
#clf = tree.DecisionTreeClassifier()
if classifier == "RF":
clf = RandomForestClassifier()
elif classifier == "NB":
clf = GaussianNB()
elif classifier == "SVM":
clf = LinearSVC()
elif classifier == "KNN":
clf = NearestCentroid()
skf = StratifiedKFold(n_splits=nfold, shuffle=True)
y_test_total = []
y_pred_total = []
for train_index, test_index in skf.split(document_term_matrix, labels):
X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index]
y_train, y_test = labels[train_index], labels[test_index]
y_test_total.extend(y_test.tolist())
model = clf.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_total.extend(y_pred.tolist())
p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print accuracy_score(y_test, y_pred)
a_score.append(accuracy_score(y_test, y_pred))
precision.append(p)
recall.append(r)
fscore.append(f)
plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5))
plt.savefig('lc.png')
return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)
def getSKLearnModel(modelName):
if modelName == 'LinearRegression':
model = linear_model.LinearRegression()
elif modelName == 'BayesianRidge':
model = linear_model.BayesianRidge()
elif modelName == 'ARDRegression':
model = linear_model.ARDRegression()
elif modelName == 'ElasticNet':
model = linear_model.ElasticNet()
elif modelName == 'HuberRegressor':
model = linear_model.HuberRegressor()
elif modelName == 'Lasso':
model = linear_model.Lasso()
elif modelName == 'LassoLars':
model = linear_model.LassoLars()
elif modelName == 'Rigid':
model = linear_model.Ridge()
elif modelName == 'SGDRegressor':
model = linear_model.SGDRegressor()
elif modelName == 'SVR':
model = SVR()
elif modelName=='MLPClassifier':
model = MLPClassifier()
elif modelName=='KNeighborsClassifier':
model = KNeighborsClassifier()
elif modelName=='SVC':
model = SVC()
elif modelName=='GaussianProcessClassifier':
model = GaussianProcessClassifier()
elif modelName=='DecisionTreeClassifier':
model = DecisionTreeClassifier()
elif modelName=='RandomForestClassifier':
model = RandomForestClassifier()
elif modelName=='AdaBoostClassifier':
model = AdaBoostClassifier()
elif modelName=='GaussianNB':
model = GaussianNB()
elif modelName=='LogisticRegression':
model = linear_model.LogisticRegression()
elif modelName=='QuadraticDiscriminantAnalysis':
model = QuadraticDiscriminantAnalysis()
return model
def test_gradient():
# Test gradient.
# This makes sure that the activation functions and their derivatives
# are correct. The numerical and analytical computation of the gradient
# should be close.
for n_labels in [2, 3]:
n_samples = 5
n_features = 10
X = np.random.random((n_samples, n_features))
y = 1 + np.mod(np.arange(n_samples) + 1, n_labels)
Y = LabelBinarizer().fit_transform(y)
for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
algorithm='l-bfgs', alpha=1e-5,
learning_rate_init=0.2, max_iter=1,
random_state=1)
mlp.fit(X, y)
theta = np.hstack([l.ravel() for l in mlp.coefs_ +
mlp.intercepts_])
layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] +
[mlp.n_outputs_])
activations = []
deltas = []
coef_grads = []
intercept_grads = []
activations.append(X)
for i in range(mlp.n_layers_ - 1):
activations.append(np.empty((X.shape[0],
layer_units[i + 1])))
deltas.append(np.empty((X.shape[0],
layer_units[i + 1])))
fan_in = layer_units[i]
fan_out = layer_units[i + 1]
coef_grads.append(np.empty((fan_in, fan_out)))
intercept_grads.append(np.empty(fan_out))
# analytically compute the gradients
def loss_grad_fun(t):
return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas,
coef_grads, intercept_grads)
[value, grad] = loss_grad_fun(theta)
numgrad = np.zeros(np.size(theta))
n = np.size(theta, 0)
E = np.eye(n)
epsilon = 1e-5
# numerically compute the gradients
for i in range(n):
dtheta = E[:, i] * epsilon
numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] -
loss_grad_fun(theta - dtheta)[0]) /
(epsilon * 2.0))
assert_almost_equal(numgrad, grad)