def train_classifier(self, trainvectors, labels, c='', kernel='', gamma='', degree='', class_weight='', iterations=10):
if len(self.label_encoder.classes_) > 2: # more than two classes to distinguish
parameters = ['estimator__C', 'estimator__kernel', 'estimator__gamma', 'estimator__degree']
multi = True
else: # only two classes to distinguish
parameters = ['C', 'kernel', 'gamma', 'degree']
multi = False
c_values = [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000] if c == '' else [float(x) for x in c.split()]
kernel_values = ['linear', 'rbf', 'poly'] if kernel == '' else [k for k in kernel.split()]
gamma_values = [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048] if gamma == '' else [float(x) for x in gamma.split()]
degree_values = [1, 2, 3, 4] if degree == '' else [int(x) for x in degree.split()]
grid_values = [c_values, kernel_values, gamma_values, degree_values]
if not False in [len(x) == 1 for x in grid_values]: # only sinle parameter settings
settings = {}
for i, parameter in enumerate(parameters):
settings[parameter] = grid_values[i][0]
if class_weight == '':
class_weight = 'balanced'
else:
iterations=int(iterations)
param_grid = {}
for i, parameter in enumerate(parameters):
param_grid[parameter] = grid_values[i]
model = svm.SVC(probability=True)
if multi:
model = OutputCodeClassifier(model)
paramsearch = RandomizedSearchCV(model, param_grid, cv = 5, verbose = 2, n_iter = iterations, n_jobs = 10, pre_dispatch = 4)
paramsearch.fit(trainvectors, self.label_encoder.transform(labels))
settings = paramsearch.best_params_
# train an SVC classifier with the settings that led to the best performance
self.model = svm.SVC(
probability = True,
C = settings[parameters[0]],
kernel = settings[parameters[1]],
gamma = settings[parameters[2]],
degree = settings[parameters[3]],
class_weight = class_weight,
cache_size = 1000,
verbose = 2
)
# if multi:
# self.model = OutputCodeClassifier(self.model)
# trainvectors = trainvectors.todense()
self.model.fit(trainvectors, self.label_encoder.transform(labels))
评论列表
文章目录