python类LogisticRegression()的实例源码

test_feature_importances.py 文件源码 项目:triage 作者: dssg 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
ClassificationLogReg.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
analyze.py 文件源码 项目:visually-grounded-speech 作者: gchrupala 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_homonym(H, sent, features, C=1.0):
    X_0 = features(matching(sent, H[0]))
    X_1 = features(matching(sent, H[1]))
    y_0 = numpy.zeros(len(X_0))
    y_1 = numpy.ones(len(X_1))
    X = normalize(numpy.vstack([X_0, X_1]), norm='l2')
    y = numpy.hstack([y_0, y_1])
    classifier = LogisticRegression(C=C)
    fold = StratifiedKFold(y, n_folds=10)
    score = []
    count = []
    for tr, te in fold:
        X_tr, X_te = X[tr], X[te]
        y_tr, y_te = y[tr], y[te]
        classifier.fit(X_tr, y_tr)
        score.append(sum(classifier.predict(X_te) == y_te))
        count.append(len(y_te))
    score = numpy.array(score, dtype='float')
    count = numpy.array(count, dtype='float')
    result = {'word1_count': len(y_0),
              'word2_count': len(y_1),
              'majority': 1.0 * max(len(y_0),len(y_1))/len(y),
              'kfold_acc': score/count }
    return result
resnet_regressor.py 文件源码 项目:Brain_Tumor_Segmentation 作者: KarthikRevanuru 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def train_xgboost():
    df = pd.read_csv('survival_data.csv', index_col=0, encoding = 'UTF-7')
    p = np.array([np.mean(np.load('training/%s_flair.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    q = np.array([np.mean(np.load('training/%s_t1.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    r = np.array([np.mean(np.load('training/%s_t1ce.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    s = np.array([np.mean(np.load('training/%s_t2.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])

    y=np.array([])
    t=0
    z=np.array([])
    for ind in range(len(folder_names_train)):
        try:
            temp = df.get_value(str(folder_names_train[ind]),'Survival')
            y=np.append(y,temp)
            temp = df.get_value(str(folder_names_train[ind]),'Age')
            z=np.append(z,np.array([temp]))
        except Exception as e:
            t+=1 
            print (t,str(e),"Label Not found, deleting entry")
            y=np.append(y,0)

    z=np.array([[v] for v in z])

    t=np.concatenate((p,q),axis=1)
    u=np.concatenate((r,s),axis=1)
    x=np.concatenate((t,u),axis=1) 
    #print(x.shape)
    #print (x)
    #print (x.shape,z.shape)
    x=np.concatenate((x,z),axis=1)
    #print (x)
    #clf=linear_model.LogisticRegression(C=1e5)
    #clf = RandomForestRegressor()
    clf = xgb.XGBRegressor()
    clf.fit(x,y)
    return clf
models_classification.py 文件源码 项目:easyML 作者: aarshayj 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=LogisticRegression(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output=pd.Series(self.default_parameters)
        self.model_output['Coefficients'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
test_optimizer_base.py 文件源码 项目:OptML 作者: johannespetrat 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_model_detection(self):
        sklearn_model = LogisticRegression()
        pipeline_model = Pipeline([('log', sklearn_model)])
        xgb_model = XGBClassifier()
        nn_model = NNModel(100,10)
        sklearn_opt = Optimizer(sklearn_model,[], lambda x: x)
        pipeline_opt = Optimizer(pipeline_model,[], lambda x: x)
        xgb_opt = Optimizer(xgb_model,[], lambda x: x)
        nn_opt = Optimizer(nn_model,[], lambda x: x)

        self.assertEqual(sklearn_opt.model_module, 'sklearn')
        self.assertEqual(pipeline_opt.model_module, 'pipeline')
        self.assertEqual(xgb_opt.model_module, 'xgboost')
        self.assertEqual(nn_opt.model_module, 'keras')
sentiment.py 文件源码 项目:Twitter-and-IMDB-Sentimental-Analytics 作者: abhinandanramesh 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def build_models_NLP(train_pos_vec, train_neg_vec):
    """
    Returns a BernoulliNB and LosticRegression Model that are fit to the training data.
    """
    Y = ["pos"]*len(train_pos_vec) + ["neg"]*len(train_neg_vec)

    # Use sklearn's BernoulliNB and LogisticRegression functions to fit two models to the training data.
    # For BernoulliNB, use alpha=1.0 and binarize=None
    # For LogisticRegression, pass no parameters
    train_vec = []
    train_vec.extend(train_pos_vec)
    train_vec.extend(train_neg_vec)

    nb_model = BernoulliNB(alpha=1.0, binarize=None, class_prior=None, fit_prior=True)
    nb_model.fit(train_vec, Y)

    lr_model = LogisticRegression()
    lr_model.fit(train_vec, Y)

    return nb_model, lr_model
sentiment.py 文件源码 项目:Twitter-and-IMDB-Sentimental-Analytics 作者: abhinandanramesh 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def build_models_DOC(train_pos_vec, train_neg_vec):
    """
    Returns a GaussianNB and LosticRegression Model that are fit to the training data.
    """
    Y = ["pos"]*len(train_pos_vec) + ["neg"]*len(train_neg_vec)

    # Use sklearn's GaussianNB and LogisticRegression functions to fit two models to the training data.
    # For LogisticRegression, pass no parameters
    train_vec = []
    train_vec.extend(train_pos_vec)
    train_vec.extend(train_neg_vec)

    nb_model = GaussianNB()
    nb_model.fit(train_vec, Y)

    lr_model = LogisticRegression()
    lr_model.fit(train_vec, Y)

    return nb_model, lr_model
learn.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])
test_estimators.py 文件源码 项目:triage 作者: dssg 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_dsapp_lr(data):
    dsapp_lr = ScaledLogisticRegression()
    dsapp_lr.fit(data['X_train'], data['y_train'])

    minmax_scaler = preprocessing.MinMaxScaler()
    dsapp_cutoff = CutOff()
    lr = linear_model.LogisticRegression()

    pipeline =Pipeline([
        ('minmax_scaler',minmax_scaler),
        ('dsapp_cutoff', dsapp_cutoff),
        ('lr', lr)
    ])

    pipeline.fit(data['X_train'], data['y_train'])

    assert np.all(dsapp_lr.predict(data['X_test']) == pipeline.predict(data['X_test']))
metrics.py 文件源码 项目:SteinGAN 作者: DartML 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
logistic_regression.py 文件源码 项目:MultimodalAutoencoder 作者: natashamjaques 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train_and_predict(self, param_dict, predict_on='val'):
        """Initializes a LR classifier according to the desired parameter settings, 
        trains it, and returns the predictions on the appropriate evaluation dataset.

        Args:
            param_dict: A dictionary with keys representing parameter names and 
                values representing settings for those parameters.
            predict_on: The dataset used for evaluating the model. Can set to 
                'Test' to get final results.

        Returns: The predicted Y labels.
        """
        if predict_on == 'test':
            predict_X = self.data_loader.test_X
        else:
            predict_X = self.data_loader.val_X

        self.model = linear_model.LogisticRegression(penalty=param_dict['penalty'], 
                                                C=param_dict['C'])
        self.model.fit(self.data_loader.train_X, self.data_loader.train_Y)
        preds = self.predict_on_data(predict_X)

        return preds
test_searchgrid.py 文件源码 项目:searchgrid 作者: jnothman 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[set_grid(clf1, kernel=['linear']),
                              clf2,
                              set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                              clf4])
    param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
                  {'clf': [clf3], 'clf__kernel': ['poly'],
                   'clf__degree': [2, 3], 'sel__k': [2, 3]},
                  {'clf': [clf2, clf4], 'sel__k': [2, 3]}]
    assert build_param_grid(estimator) == param_grid
test_searchgrid.py 文件源码 项目:searchgrid 作者: jnothman 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_make_grid_search():
    X, y = load_iris(return_X_y=True)
    lr = LogisticRegression()
    svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
    gs1 = make_grid_search(lr, cv=5)  # empty grid
    gs2 = make_grid_search(svc, cv=5)
    gs3 = make_grid_search([lr, svc], cv=5)
    for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
        gs.fit(X, y)
        assert gs.cv == 5
        assert len(gs.cv_results_['params']) == n_results

    svc_mask = gs3.cv_results_['param_root'] == svc
    assert svc_mask.sum() == 2
    assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
    assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
_logistic_regression.py 文件源码 项目:coremltools 作者: gsabran 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def convert(model, feature_names, target):
    """Convert a Logistic Regression model to the protobuf spec.
    Parameters
    ----------
    model: LogisticRegression
        A trained LogisticRegression model.

    feature_names: [str], optional (default=None)
        Name of the input columns.

    target: str, optional (default=None)
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, LogisticRegression)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))

    return _MLModel(_convert(model, feature_names, target))
metrics.py 文件源码 项目:Deep-Learning-with-Theano 作者: PacktPublishing 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_stacked_classfier_extkfold(self):
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = StackedClassifier(bclf,
                               clfs,
                               n_folds=3,
                               verbose=0,
                               Kfold=StratifiedKFold(self.iris.target, 3),
                               stack_by_proba=False,
                               oob_score_flag=True,
                               oob_metrics=log_loss)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_fwls_classfier(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = FWLSClassifier(bclf,
                            clfs,
                            feature_func=feature_func,
                            n_folds=3,
                            verbose=0,
                            Kfold=StratifiedKFold(self.iris.target, 3),
                            stack_by_proba=False)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
exp_utils.py 文件源码 项目:gcForest 作者: kingfengji 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def prec_log(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))
    clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_log={:.6f}%'.format(prec*100.0))
    return clf, y_pred
distances.py 文件源码 项目:abcpy 作者: eth-cscs 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def distance(self, d1, d2):
        # Extract summary statistics from the dataset
        s1 = self.statistics_calc.statistics(d1)
        s2 = self.statistics_calc.statistics(d2)

        # compute distnace between the statistics
        training_set_features = np.concatenate((s1, s2), axis=0)
        label_s1 = np.zeros(shape=(len(s1), 1))
        label_s2 = np.ones(shape=(len(s2), 1))
        training_set_labels = np.concatenate((label_s1, label_s2), axis=0).ravel()

        reg_inv = 1e5
        log_reg_model = linear_model.LogisticRegression(C=reg_inv, penalty='l1')
        log_reg_model.fit(training_set_features, training_set_labels)
        score = log_reg_model.score(training_set_features, training_set_labels)
        distance = 2.0 * (score - 0.5)

        return distance
a2.py 文件源码 项目:assignments 作者: iit-cs579 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def cross_validation_accuracy(clf, X, labels, k):
    """
    Compute the average testing accuracy over k folds of cross-validation. You
    can use sklearn's KFold class here (no random seed, and no shuffling
    needed).

    Params:
      clf......A LogisticRegression classifier.
      X........A csr_matrix of features.
      labels...The true labels for each instance in X
      k........The number of cross-validation folds.

    Returns:
      The average testing accuracy of the classifier
      over each fold of cross-validation.
    """
    ###TODO
    pass
a2.py 文件源码 项目:assignments 作者: iit-cs579 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def fit_best_classifier(docs, labels, best_result):
    """
    Using the best setting from eval_all_combinations,
    re-vectorize all the training data and fit a
    LogisticRegression classifier to all training data.
    (i.e., no cross-validation done here)

    Params:
      docs..........List of training document strings.
      labels........The true labels for each training document (0 or 1)
      best_result...Element of eval_all_combinations
                    with highest accuracy
    Returns:
      clf.....A LogisticRegression classifier fit to all
            training data.
      vocab...The dict from feature name to column index.
    """
    ###TODO
    pass
a2.py 文件源码 项目:assignments 作者: iit-cs579 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def top_coefs(clf, label, n, vocab):
    """
    Find the n features with the highest coefficients in
    this classifier for this label.
    See the .coef_ attribute of LogisticRegression.

    Params:
      clf.....LogisticRegression classifier
      label...1 or 0; if 1, return the top coefficients
              for the positive class; else for negative.
      n.......The number of coefficients to return.
      vocab...Dict from feature name to column index.
    Returns:
      List of (feature_name, coefficient) tuples, SORTED
      in descending order of the coefficient for the
      given class label.
    """
    ###TODO
    pass
a2.py 文件源码 项目:assignments 作者: iit-cs579 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def print_top_misclassified(test_docs, test_labels, X_test, clf, n):
    """
    Print the n testing documents that are misclassified by the
    largest margin. By using the .predict_proba function of
    LogisticRegression <https://goo.gl/4WXbYA>, we can get the
    predicted probabilities of each class for each instance.
    We will first identify all incorrectly classified documents,
    then sort them in descending order of the predicted probability
    for the incorrect class.
    E.g., if document i is misclassified as positive, we will
    consider the probability of the positive class when sorting.

    Params:
      test_docs.....List of strings, one per test document
      test_labels...Array of true testing labels
      X_test........csr_matrix for test data
      clf...........LogisticRegression classifier fit on all training
                    data.
      n.............The number of documents to print.

    Returns:
      Nothing; see Log.txt for example printed output.
    """
    ###TODO
    pass
task73.py 文件源码 项目:nlp100knock 作者: ff9900 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def task73(features):

    features = numpy.array(features)
    words = list(set(features[:, 1]))

    pos_vec = numpy.zeros(len(words))
    neg_vec = numpy.zeros(len(words))

    for feature in features:
        index = words.index(feature[1])
        if feature[0] == '-1':
            pos_vec[index] += 1
        else:
            neg_vec[index] += 1

    model = linear_model.LogisticRegression()
    model.fit([pos_vec, neg_vec], [1, -1])

    return (words, model)
experiment.py 文件源码 项目:question-answering 作者: emorynlp 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_lr_on_data(X_train, y_train, X_validate, y_validate, X_test, y_test):
    y_train_flatten = list(itertools.chain(*y_train))

    # Train LR Model
    lr = LogisticRegression(solver='lbfgs')
    lr.fit(X_train, y_train_flatten)

    # Test model on validation set
    predictions_val = lr.predict_proba(X_validate)
    predictions_val = array([i[-1] for i in predictions_val])
    best_threshold_validate = find_threshold_logistic(y_validate, predictions_val, predictions_val)
    precision_val, recall_val, f1_val = evaluate_with_threshold(y_validate, predictions_val, predictions_val,
                                                                best_threshold_validate)
    globals.logger.info("Found threshold: %f. Precision/recall/f1 over validation set: %f/%f/%f" %
                        (best_threshold_validate, precision_val, recall_val, f1_val))

    # Test model on test set
    predictions_test = lr.predict_proba(X_test)
    predictions_test = array([i[-1] for i in predictions_test])
    best_threshold_test = find_threshold_logistic(y_test, predictions_test, predictions_test, verbose=True)
    precision, recall, f1 = evaluate_with_threshold(y_test, predictions_test, predictions_test, best_threshold_test)
    globals.logger.info("Found threshold: %f. Precision/recall/f1 over test set: %f/%f/%f" %
                        (best_threshold_test, precision, recall, f1))

    return precision, recall, f1
_logistic_regression.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def convert(model, feature_names, target):
    """Convert a Logistic Regression model to the protobuf spec.
    Parameters
    ----------
    model: LogisticRegression
        A trained LogisticRegression model.

    feature_names: [str], optional (default=None)
        Name of the input columns.

    target: str, optional (default=None)
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, LogisticRegression)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))

    return _MLModel(_convert(model, feature_names, target))
nasality.py 文件源码 项目:AutismVoicePrint 作者: opraveen 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train_using_logistic(feat1, feat2):

    n_plus = len(feat1)
    n_minus = len(feat2)

    X = np.concatenate((feat1, feat2), axis=0)
    y = np.concatenate((np.zeros(n_plus), np.ones(n_minus)), axis=0)
    y = y + 1

    print(X.shape, y.shape, n_plus, n_minus, feat1.shape, feat2.shape)

    logreg = linear_model.LogisticRegression(C=1e5)
    logreg.fit(X, y)

    print("Score using logistic regression on training data is ", logreg.score(X, y))
    return logreg


问题


面经


文章

微信
公众号

扫码关注公众号