wangbase.py 文件源码-python代码片段

def train(self, a_train_data, a_dev_data=None, a_n_y=-1,
              a_i=-1, a_train_out=None, a_dev_out=None):
        """Method for training the model.

        Args:
          a_train_data (tuple[list, dict]):
            list of training JSON data
          a_dev_data (tuple[list, dict] or None):
            list of development JSON data
          a_n_y (int):
            number of distinct classes
          a_i (int):
            row index for the output predictions
          a_train_out (np.array or None):
            predictions for the training set
          a_dev_out (np.array or None):
            predictions for the training set

        Returns:
          void:

        Note:
          updates ``a_train_out`` and ``a_dev_out`` in place

        """
        self.n_y = a_n_y
        x_train, y_train = self._generate_ts(a_train_data)
        x_dev, y_dev = self._generate_ts(a_dev_data)
        # determine cross-validation and grid-search strategy and fit the model
        if self._gs:
            if a_dev_data is None or not a_dev_data[0]:
                cv = StratifiedKFold(y_train, n_folds=NFOLDS, shuffle=True)
            else:
                cv = self._devset_cv(y_train, len(y_dev), NFOLDS)
                x_train = x_train + x_dev
                y_train = y_train + y_dev
            scorer = make_scorer(f1_score, average="macro")
            self._model = GridSearchCV(self._model, self.PARAM_GRID,
                                       scoring=scorer,
                                       cv=cv, n_jobs=self.N_JOBS, verbose=1)
        self._model.fit([el[-1] for el in x_train], y_train)
        # output best hyper-parameters
        if self._gs:
            print("Best params:", repr(self._model.best_params_),
                  file=sys.stderr)
        if a_i >= 0:
            if a_train_out is not None:
                if self._gs and a_dev_data and a_dev_data[0]:
                    x_train = x_train[:-len(x_dev)]
                for i, x_i in x_train:
                    self._predict(x_i, a_train_out[i], a_i)
            if a_dev_out is not None:
                for i, x_i in x_dev:
                    self._predict(x_i, a_dev_out[i], a_i)