def train_classifier(self, trainvectors, labels, c='', solver='', dual='', penalty='', multiclass='', max_iterations=1000, iterations=10):
if len(self.label_encoder.classes_) > 2: # more than two classes to distinguish
parameters = ['estimator__C', 'estimator__solver', 'estimator__penalty', 'estimator__dual', 'estimator__multi_class']
# multi = True
else: # only two classes to distinguish
parameters = ['C', 'solver', 'penalty', 'dual', 'multi_class']
# multi = False
c_values = [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000] if c == '' else [float(x) for x in c.split()]
solver_values = ['newton-cg', 'lbfgs', 'liblinear', 'sag'] if solver == '' else [s for s in solver.split()]
if penalty == '':
if not set(['newton-cg','lbfgs','sag']) & set(solver_values):
penalty_values = ['l1', 'l2']
else:
penalty_values = ['l2']
else:
penalty_values = [penalty]
if dual == '':
if len(solver_values) == 1 and solver_values[0] == 'liblinear':
if len(penalty_values) == 1 and penalty_values[0] == 'l2':
dual_values = [True,False]
else:
dual_values = [False]
else:
dual_values = [int(dual)] # 1 or 0
if multiclass == '':
if 'liblinear' not in solver_values:
multiclass_values = ['ovr', 'multinomial']
else:
multiclass_values = ['ovr']
else:
multiclass_values = [multiclass]
grid_values = [c_values, solver_values, penalty_values, dual_values, multiclass_values]
max_iterations = int(max_iterations)
if not False in [len(x) == 1 for x in grid_values]: # only sinle parameter settings
settings = {}
for i, parameter in enumerate(parameters):
settings[parameter] = grid_values[i][0]
else: # try different parameter combinations
iterations=int(iterations)
param_grid = {}
for i, parameter in enumerate(parameters):
param_grid[parameter] = grid_values[i]
model = LogisticRegression(max_iter=max_iterations)
# if multi:
# model = OutputCodeClassifier(model)
paramsearch = RandomizedSearchCV(model, param_grid, cv = 5, verbose = 2, n_iter = iterations, n_jobs = 10, pre_dispatch = 4)
paramsearch.fit(trainvectors, self.label_encoder.transform(labels))
settings = paramsearch.best_params_
# train a logistic regression classifier with the settings that led to the best performance
self.model = LogisticRegression(
C = settings[parameters[0]],
solver = settings[parameters[1]],
penalty = settings[parameters[2]],
dual = settings[parameters[3]],
multi_class = settings[parameters[4]],
max_iter = max_iterations,
verbose = 2
)
# if multi:
# self.model = OutputCodeClassifier(self.model)
self.model.fit(trainvectors, self.label_encoder.transform(labels))
评论列表
文章目录