def get_feature_importance(self,clf, model_name ):
clfs = {'RandomForestClassifier':'feature_importances',
'ExtraTreesClassifier': 'feature_importances',
'AdaBoostClassifier': 'feature_importances',
'LogisticRegression': 'coef',
'svm.SVC': 'coef',
'GradientBoostingClassifier': 'feature_importances',
'GaussianNB': None,
'DecisionTreeClassifier': 'feature_importances',
'SGDClassifier': 'coef',
'KNeighborsClassifier': None,
'linear.SVC': 'coef'}
if clfs[model_name] == 'feature_importances':
return list(clf.feature_importances_)
elif clfs[model_name] == 'coef':
return list(clf.coef_.tolist())
else:
return None
python类AdaBoostClassifier()的实例源码
def get_classifier_class(class_name):
name_table = {
'svm': SVC,
'k_neighbors': KNeighborsClassifier,
'gaussian_process': GaussianProcessClassifier,
'decision_tree': DecisionTreeClassifier,
'random_forest': RandomForestClassifier,
'ada_boost': AdaBoostClassifier,
'mlp': MLPClassifier,
'gaussian_naive_bayes': GaussianNB,
'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
}
if class_name not in name_table:
raise ValueError('No such classifier')
return name_table[class_name]
def define_model(self, model, parameters, n_cores = 0):
clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
'GaussianNB': GaussianNB(),
'DecisionTreeClassifier': DecisionTreeClassifier(),
'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3),
'linear.SVC': svm.LinearSVC() }
if model not in clfs:
raise ConfigError("Unsupported model {}".format(model))
clf = clfs[model]
clf.set_params(**parameters)
return clf
def __init__(self, isTrain, isOutlierRemoval):
super(ClassificationAdaBoost, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
self.dt_stump = DecisionTreeClassifier(max_depth=10)
self.ada = AdaBoostClassifier(
base_estimator=self.dt_stump,
learning_rate=1,
n_estimators=7,
algorithm="SAMME.R")
# self.dt_stump = DecisionTreeClassifier(max_depth=14)
# self.ada = AdaBoostClassifier(
# base_estimator=self.dt_stump,
# learning_rate=1,
# n_estimators=50,
# algorithm="SAMME")
def __init__(
self,data_block, predictors=[],cv_folds=10,
scoring_metric='accuracy',additional_display_metrics=[]):
base_classification.__init__(
self, alg=AdaBoostClassifier(), data_block=data_block,
predictors=predictors,cv_folds=cv_folds,
scoring_metric=scoring_metric,
additional_display_metrics=additional_display_metrics
)
self.model_output = pd.Series(self.default_parameters)
self.model_output['Feature_Importance'] = "-"
#Set parameters to default values:
self.set_parameters(set_default=True)
def get_classifier(self):
algo=self.algo
if algo=="GBT":
return GradientBoostingClassifier()
elif algo=="RF":
return RandomForestClassifier()
elif algo=="ADB":
return AdaBoostClassifier()
elif algo =="DT":
return DecisionTreeClassifier()
elif algo=="NB":
return BernoulliNB()
elif algo=="SGD":
return SGDClassifier()
elif algo=="SVC":
return LinearSVC()
elif algo=="MLPC":
return MLPClassifier(activation='logistic', batch_size='auto',
early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
learning_rate_init=0.1, max_iter=5000, random_state=1,
solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
warm_start=False)
return 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
"""
Ada Boosting binary Classification
"""
# n = parameters[0]
# l = parameters[1]
clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
# if savemodel == True:
# fname_out = '{}-{}.pickle'.format(fout, datetime.now())
# with open(fname_out, 'wb') as f:
# cPickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
print "AdaBoost: ", accuracy
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
"""
Ada Boosting binary Classification
"""
# n = parameters[0]
# l = parameters[1]
clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
# if savemodel == True:
# fname_out = '{}-{}.pickle'.format(fout, datetime.now())
# with open(fname_out, 'wb') as f:
# cPickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
print "AdaBoost: ", accuracy
def buildModel(dataset, method, parameters):
"""
Build final model for predicting real testing data
"""
features = dataset.columns[0:-1]
if method == 'RNN':
clf = performRNNlass(dataset[features], dataset['UpDown'])
return clf
elif method == 'RF':
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
elif method == 'KNN':
clf = neighbors.KNeighborsClassifier()
elif method == 'SVM':
c = parameters[0]
g = parameters[1]
clf = SVC(C=c, gamma=g)
elif method == 'ADA':
clf = AdaBoostClassifier()
return clf.fit(dataset[features], dataset['UpDown'])
def learn(x, y, test_x):
# set sample weight
weight_list = []
for j in range(len(y)):
if y[j] == "0":
weight_list.append(variables.weight_0_ada)
if y[j] == "1000":
weight_list.append(variables.weight_1000_ada)
if y[j] == "1500":
weight_list.append(variables.weight_1500_ada)
if y[j] == "2000":
weight_list.append(variables.weight_2000_ada)
clf = AdaBoostClassifier(n_estimators=variables.n_estimators_ada, learning_rate=variables.learning_rate_ada).fit(x,
y,
np.asarray(
weight_list))
prediction_list = clf.predict(test_x)
prediction_list_prob = clf.predict_proba(test_x)
return prediction_list, prediction_list_prob
def init_clf(clf_used, params=None):
if params is not None:
params_used = params
elif clf_used == 'svm':
params_used = svm_params
elif clf_used == 'ada_boost':
params_used = rf_params
elif clf_used == 'lr':
params_used = lr_params
else:
params_used = rf_params
if clf_used == 'svm':
clf = SVC(**params_used)
elif clf_used == 'ada_boost':
rf = RandomForestClassifier(**rf_params)
clf = AdaBoostClassifier(base_estimator=rf, **params_used)
elif clf_used == 'lr':
clf = LogisticRegressionCV(**params_used)
else:
clf = RandomForestClassifier(**params_used)
return clf
classify.py 文件源码
项目:Stock-Market-Analysis-and-Prediction
作者: samshara
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel):
"""
Ada Boosting binary Classification
"""
# n = parameters[0]
# l = parameters[1]
clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
if savemodel == True:
#fname_out = '{}-{}.pickle'.format(fout, datetime.now())
fname_out = fout + '.pickle'
with open(fname_out, 'wb') as f:
pickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
return accuracy
def test_AdaBoostClassifier(*data):
'''
test Ada score with different number of classifiers
:param data: train_data, test_data, train_value, test_value
:return: None
'''
X_train,X_test,y_train,y_test=data
clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
clf.fit(X_train,y_train)
## graph
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
estimators_num=len(clf.estimators_)
X=range(1,estimators_num+1)
ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="best")
ax.set_title("AdaBoostClassifier")
plt.show()
def test_AdaBoostClassifier_learning_rate(*data):
'''
test performance with different learning rate
:param data: train_data, test_data, train_value, test_value
:return: None
'''
X_train,X_test,y_train,y_test=data
learning_rates=np.linspace(0.01,1)
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
traing_scores=[]
testing_scores=[]
for learning_rate in learning_rates:
clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,n_estimators=500)
clf.fit(X_train,y_train)
traing_scores.append(clf.score(X_train,y_train))
testing_scores.append(clf.score(X_test,y_test))
ax.plot(learning_rates,traing_scores,label="Traing score")
ax.plot(learning_rates,testing_scores,label="Testing score")
ax.set_xlabel("learning rate")
ax.set_ylabel("score")
ax.legend(loc="best")
ax.set_title("AdaBoostClassifier")
plt.show()
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
"""Description of compare
compare multiple classifier and display the best one
"""
utils.print_success("Comparison of differents classifiers")
if data is not None:
train_features = data["train_features"]
train_groundtruths = data["train_groundtruths"]
test_features = data["test_features"]
test_groundtruths = data["test_groundtruths"]
else:
train = utils.abs_path_file(train)
test = utils.abs_path_file(test)
train_features, train_groundtruths = read_file(train)
test_features, test_groundtruths = read_file(test)
if not utils.create_dir(res_dir):
res_dir = utils.abs_path_dir(res_dir)
classifiers = {
"RandomForest": RandomForestClassifier(n_jobs=-1)
# "RandomForest": RandomForestClassifier(n_estimators=5),
# "KNeighbors":KNeighborsClassifier(3),
# "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
# "DecisionTree":DecisionTreeClassifier(max_depth=5),
# "MLP":MLPClassifier(),
# "AdaBoost":AdaBoostClassifier(),
# "GaussianNB":GaussianNB(),
# "QDA":QuadraticDiscriminantAnalysis(),
# "SVM":SVC(kernel="linear", C=0.025),
# "GradientBoosting":GradientBoostingClassifier(),
# "ExtraTrees":ExtraTreesClassifier(),
# "LogisticRegression":LogisticRegression(),
# "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
}
for key in classifiers:
utils.print_success(key)
clf = classifiers[key]
utils.print_info("\tFit")
clf.fit(train_features, train_groundtruths)
utils.print_info("\tPredict")
predictions = clf.predict(test_features)
return predictions
def constructModel(corpus, classList, features, modelOutput):
"""
Trains a Decision Tree model on the test corpus.
Args:
corpus: A list of lists, containing the GC content, coverage, and class number.
classList: A list of class names.
features: List of variables used by each contig.
modelOutput: Location to save model as GraphViz DOT, or False to save no model.
Returns:
classifier: A DecisionTreeClassifier object that has been trained on the test corpus.
"""
corpus.sort() # just in case
X = []
Y = []
for item in corpus:
X.append(item[:-1]) # all but the last item
Y.append(item[-1]) # only the last item
X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0)
# TODO: implement classifier testing and comparison, now only baggingClassifier is used as per paper
#treeClassifier = tree.DecisionTreeClassifier()
#treeClassifier = treeClassifier.fit(X_train, Y_train)
#click.echo("Decision tree classifier built, score is %s out of 1.00" % treeClassifier.score(X_test, Y_test))
baggingClassifier = ensemble.BaggingClassifier()
baggingClassifier = baggingClassifier.fit(X_train, Y_train)
click.echo("Bagging classifier built, score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test))
#forestClassifier = ensemble.RandomForestClassifier(n_estimators=10)
#forestClassifier = forestClassifier.fit(X_train, Y_train)
#click.echo("Random forest classifier built, score is %s out of 1.00" % forestClassifier.score(X_test, Y_test))
#adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100)
#adaClassifier = adaClassifier.fit(X_train, Y_train)
#click.echo("AdaBoost classifier built, score is %s out of 1.00" % adaClassifier.score(X_test, Y_test))
#gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100)
#gradientClassifier = gradientClassifier.fit(X_train, Y_train)
#click.echo("Gradient tree boosting classifier built, score is %s out of 1.00" % gradientClassifier.score(X_test, Y_test))
if modelOutput:
with open(modelOutput, 'w') as dotfile:
tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features,
class_names=classList, filled=True, rounded=True, special_characters=True)
return baggingClassifier
def adaboost(train, test, smoteit=True):
"ADABOOST"
if smoteit:
train = SMOTE(train)
clf = AdaBoostClassifier()
train_DF = formatData(train)
test_DF = formatData(test)
features = train_DF.columns[:-2]
klass = train_DF[train_DF.columns[-2]]
# set_trace()
clf.fit(train_DF[features], klass)
preds = clf.predict(test_DF[test_DF.columns[:-2]]).tolist()
return preds
def adaBoost(self, settings, data=None, dropna=True):
df = self.__loadData(data, dropna)
features = df.columns[:-1]
X = df[features]
y = df.iloc[:, -1].values
seed = 7
num_trees = 500
kfold = model_selection.KFold(n_splits=10, random_state=seed)
print kfold
model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
results = model_selection.cross_val_score(model, X, y, cv=kfold)
model.fit(X, y)
print results.mean()
print model.score(X, y)
return True
def classification(lead):
#classifiers = [
# ('ab', AdaBoostClassifier()),
# ('dt', DecisionTreeClassifier(max_depth=5)),
# ('kn', KNeighborsClassifier(16)),
#]
inputs = get_dataset_input_from_database(lead.keys())
outputs = get_dataset_output_from_database()
print('The total number of examples in the dataset is: %d' % (len(inputs)))
inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
print('The number of examples used for training are: %d' % (len(inputs_training)))
print('The number of examples used for testing are: %d' % (len(inputs_test)))
knn = KNeighborsClassifier(n_neighbors=7, p=2)
knn.fit(inputs_training, np.ravel(outputs_training))
print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
#voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
#voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
#print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
print('Lead data:')
print(lead)
data_to_predict = convert_dict_to_tuple(lead)
print('Lead data to predict:')
print(data_to_predict)
lead_status = knn.predict(data_to_predict)
lead_status_value = lead_status[0]
#lead_status = voting_classifier.predict(data_to_predict)
print('According to lead data, his status is: %d' % (lead_status_value))
print('[0] unqualified [1] qualified')
proba = knn.predict_proba(data_to_predict)
max_proba = max(proba[0])
print('Proba is: %d%%' %(max_proba*100))
lead_status_dict = dict()
dict.update(lead_status_dict, value=str(lead_status_value))
dict.update(lead_status_dict, proba=str(max_proba))
return lead_status_dict
test.py 文件源码
项目:Audio-classification-using-Bag-of-Frames-approach
作者: amogh3892
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def adaboost_predict(training_samples, training_labels, test_samples, test_lables,n_estimators=50, learning_rate=1.0):
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators = n_estimators, learning_rate =learning_rate)
t0 = time()
clf.fit(training_samples,training_labels)
training_time = round(time()-t0, 3)
t0 = time()
pred = clf.predict(test_samples)
test_time = round(time()-t0, 3)
from sklearn.metrics import accuracy_score
acc = accuracy_score(pred,test_lables)
no_features = np.array(training_samples).shape[1]
training_samples = np.array(training_samples).shape[0]
test_samples = np.array(test_samples).shape[0]
with open("Temp\\results.txt","w") as outfile:
outfile.write("Alogirthm : {}\n".format("Adaboost"))
outfile.write("Estimators = {}\n".format(n_estimators))
outfile.write("Learning rate = {}\n".format(learning_rate))
outfile.write("No of features : {}\n".format(no_features))
outfile.write("No of training samples : {}\n".format(training_samples))
outfile.write("No of test samples : {}\n".format(test_samples))
outfile.write("Training time : {}\n".format(training_time))
outfile.write("Test time : {}\n".format(test_time))
outfile.write("Accuracy : {}\n".format(acc))
with open("Temp\\result_labels.csv","wb") as outfile:
np.savetxt(outfile,pred)
ClassificationHmmGeneralize.py 文件源码
项目:AirTicketPredicting
作者: junlulocky
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self, isTrain):
super(ClassificationHmmGeneralize, self).__init__(isTrain)
# data preprocessing
self.dataPreprocessing()
self.dt_stump = DecisionTreeClassifier(max_depth=10)
self.ada = AdaBoostClassifier(
base_estimator=self.dt_stump,
learning_rate=1,
n_estimators=5,
algorithm="SAMME.R")
# load the general data
# feature 0~7: flight number dummy variables
# feature 8: departure date; feature 9: observed date state;
# feature 10: minimum price; feature 11: maximum price
# feature 12: output; feature 13: current price
# feature 14: flight index
self.X_general = np.load('inputGeneralClf_HmmParsed/X_train.npy')
self.y_general = np.load('inputGeneralClf_HmmParsed/y_train.npy')
self.y_general = self.y_general.reshape((self.y_general.shape[0], 1))
self.y_general_price = np.load('inputGeneralClf_HmmParsed/y_train_price.npy')
self.y_general_price = self.y_general_price.reshape((self.y_general_price.shape[0], 1))
self.y_general_index = np.load('inputGeneralClf_HmmParsed/y_index.npy')
self.y_general_index = self.y_general_index.reshape((self.y_general_index.shape[0], 1))
self.routes_general = ["BGY_OTP", # route 1
"BUD_VKO", # route 2
"CRL_OTP", # route 3
"CRL_WAW", # route 4
"LTN_OTP", # route 5
"LTN_PRG", # route 6
"OTP_BGY", # route 7
"OTP_CRL", # route 8
"OTP_LTN", # route 9
"PRG_LTN", # route 10
"VKO_BUD", # route 11
"WAW_CRL"] # route 12
def get_data_preprocessor_balancing(params, y):
d_balancing = params['layer_dict_list'][1]
if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
# for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
params['class_weight'] = None
# for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
params['sample_weight'] = None
elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
# for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
params['class_weight'] = 'auto'
# for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
if len(y.shape) > 1:
offsets = [2 ** i for i in range(y.shape[1])]
y_ = np.sum(y * offsets, axis=1)
else:
y_ = y
unique, counts = np.unique(y_, return_counts=True)
cw = 1. / counts
cw = cw / np.mean(cw)
sample_weight = np.ones(y_.shape)
for i, ue in enumerate(unique):
mask = y_ == ue
sample_weight[mask] *= cw[i]
params['sample_weight'] = sample_weight
return params
def generate_filter(X_train, y_train):
# clf = RidgeClassifierCV(alphas=[0.01, 0.1, 1, 10])
clf = RandomForestClassifier(n_jobs=4)
# clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
return clf
Adaboost.py 文件源码
项目:Machine-Learning-Tools-on-Iris-Dataset
作者: debjitpaul
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def perform_adaboost(self,X_train_std,y_train,X_test_std, y_test): ##perform adaboost
ada = AdaBoostClassifier(n_estimators=10)
ada.fit(X_train_std, y_train)
train_score=cross_val_score(ada,X_train_std, y_train)
print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
test_score=cross_val_score(ada,X_test_std, y_test)
print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
X=X_test_std
y=y_test
resolution=0.01
#Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'green', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y_test))])
X=X_test_std
y=y_test
# plot the decision surface
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
Z = ada.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
alpha=0.5, c=cmap(idx),
marker=markers[idx], label=cl)
plt.show()
def fitAndPredict(self):
# classifier = LogisticRegression()
# classifier.fit(self.trainingSet, self.trainingLabel)
# pred_labels = classifier.predict(self.testSet)
# print 'Logistic:'
# print classification_report(self.testLabel, pred_labels)
classifier = SVC()
classifier.fit(self.trainingSet, self.trainingLabel)
pred_labels = {}
for user in self.testDict:
pred_labels[user] = classifier.predict([self.model.docvecs[user]])
# print 'SVM:'
# print classification_report(self.testLabel, pred_labels)
return pred_labels
# classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
# max_depth=1, random_state=0)
# classifier.fit(self.trainingSet, self.trainingLabel)
# pred_labels = classifier.predict(self.testSet)
# print 'GBDT:'
# print classification_report(self.testLabel, pred_labels)
#
# clf = AdaBoostClassifier(n_estimators=100)
# classifier.fit(self.trainingSet, self.trainingLabel)
# pred_labels = classifier.predict(self.testSet)
# print 'AdaBoost:'
# print classification_report(self.testLabel, pred_labels)
#
# clf = RandomForestClassifier(n_estimators=10)
# classifier.fit(self.trainingSet, self.trainingLabel)
# pred_labels = classifier.predict(self.testSet)
# print 'Random Forest:'
# print classification_report(self.testLabel, pred_labels)
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, savemodel):
"""
Ada Boosting binary Classification
"""
# n = parameters[0]
# l = parameters[1]
clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
return accuracy
def test_sample_weight_elm():
"""Smoke test - AdaBoostClassifier should work with ELMClassifer."""
X = Xdigits_binary[:50]
y = ydigits_binary[:50]
elm = ELMClassifier(n_hidden=20)
clf = AdaBoostClassifier(n_estimators=3, base_estimator=elm)
clf.fit(X, y)
assert_greater(clf.score(X, y), 0.9)
def getBestOne(self, name):
# if the classifier has already generated
try:
from sklearn.externals import joblib
clf = joblib.load(name + '.pkl')
return clf
except:
pass
# if the classifier is not exists
# search for the best loop time
bestAccuracyRate, n_estimators = 0, 1
for loopTimes in range(2, 200):
sclf = AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=loopTimes, algorithm='SAMME')
# cross validation to get the score
X_train, X_test, Y_train, Y_test = train_test_split(self.X, self.Y, test_size=0.1, random_state=0)
sclf.fit(X_train, Y_train)
accuracyRate = sclf.score(X_test, Y_test)
if accuracyRate > bestAccuracyRate:
bestAccuracyRate = accuracyRate
n_estimators = loopTimes
# save the classifier as a dump
joblib.dump(sclf, name + '.pkl')
return AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=n_estimators, algorithm='SAMME')
def ada_boost_classifier(self, data, target, learning_rate=1, n_estimators=400, enable_ada=False):
ada_boost = AdaBoostClassifier(
base_estimator=self.clf,
learning_rate=learning_rate,
n_estimators=n_estimators,
algorithm="SAMME.R")
ada_boost.fit(data, target)
if not enable_ada:
self.clf = ada_boost
print "AdaBoost training finished"