classifier.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:quoll 作者: LanguageMachines 项目源码 文件源码
def train_classifier(self, trainvectors, labels, c='', solver='', dual='', penalty='', multiclass='', max_iterations=1000, iterations=10):
        if len(self.label_encoder.classes_) > 2: # more than two classes to distinguish
            parameters = ['estimator__C', 'estimator__solver', 'estimator__penalty', 'estimator__dual', 'estimator__multi_class']
            # multi = True
        else: # only two classes to distinguish
            parameters = ['C', 'solver', 'penalty', 'dual', 'multi_class']
            # multi = False
        c_values = [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000] if c == '' else [float(x) for x in c.split()]
        solver_values = ['newton-cg', 'lbfgs', 'liblinear', 'sag'] if solver == '' else [s for  s in solver.split()]
        if penalty == '':
            if not set(['newton-cg','lbfgs','sag']) & set(solver_values):
                penalty_values = ['l1', 'l2']
            else:
                penalty_values = ['l2']
        else:
            penalty_values = [penalty]
        if dual == '':
            if len(solver_values) == 1 and solver_values[0] == 'liblinear':
                if len(penalty_values) == 1 and penalty_values[0] == 'l2':
                    dual_values = [True,False]
            else:
                dual_values = [False]
        else:
            dual_values = [int(dual)] # 1 or 0
        if multiclass == '':
            if 'liblinear' not in solver_values:
                multiclass_values = ['ovr', 'multinomial']
            else:
                multiclass_values = ['ovr']
        else:
            multiclass_values = [multiclass]
        grid_values = [c_values, solver_values, penalty_values, dual_values, multiclass_values]
        max_iterations = int(max_iterations)
        if not False in [len(x) == 1 for x in grid_values]: # only sinle parameter settings
            settings = {}
            for i, parameter in enumerate(parameters):
                settings[parameter] = grid_values[i][0]
        else: # try different parameter combinations
            iterations=int(iterations)
            param_grid = {}
            for i, parameter in enumerate(parameters):
                param_grid[parameter] = grid_values[i]
            model = LogisticRegression(max_iter=max_iterations)
            # if multi:
            #     model = OutputCodeClassifier(model)
            paramsearch = RandomizedSearchCV(model, param_grid, cv = 5, verbose = 2, n_iter = iterations, n_jobs = 10, pre_dispatch = 4)
            paramsearch.fit(trainvectors, self.label_encoder.transform(labels))
            settings = paramsearch.best_params_
        # train a logistic regression classifier with the settings that led to the best performance
        self.model = LogisticRegression(
            C = settings[parameters[0]],
            solver = settings[parameters[1]],
            penalty = settings[parameters[2]],
            dual = settings[parameters[3]],
            multi_class = settings[parameters[4]],
            max_iter = max_iterations,
            verbose = 2
        )
        # if multi:
        #     self.model = OutputCodeClassifier(self.model)
        self.model.fit(trainvectors, self.label_encoder.transform(labels))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号