python类LogisticRegression()的实例源码

LR_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def generate_LR_model(file_name):
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|connectionType_.*|telecomsOperator_.*|sitesetID_.*|positionType_.*|gender_.*|haveBaby_.*|age_scaled')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Logistic Regression Model...'
    start_time  = datetime.datetime.now()
    clf = linear_model.LogisticRegression(penalty='l2',C=1.0,solver='sag',n_jobs=-1, tol=1e-6, max_iter=200)#, class_weight='balanced')
    clf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: '
    print (end_time-start_time).seconds

    print 'Save Model...'
    joblib.dump(clf, 'LR.model')
    return clf
utils.py 文件源码 项目:magic 作者: pan-webis-de 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_classifier(method='logistic_regression'):
    if 'logistic_regression' == method:
        return LogisticRegression(C=1e3,
                                  tol=0.01,
                                  multi_class='ovr',
                                  solver='liblinear',
                                  n_jobs=-1,
                                  random_state=123)
    if 'random_forest' == method:
        return RandomForestClassifier(n_estimators=250,
                                      bootstrap=False,
                                      n_jobs=-1,
                                      random_state=123)

    if 'gradient_boosting' == method:
        return xgb.XGBClassifier(max_depth=10,
                                 subsample=0.7,
                                 n_estimators=500,
                                 min_child_weight=0.05,
                                 colsample_bytree=0.3,
                                 learning_rate=0.1)
AIserver.py 文件源码 项目:Using-machine-learning-to-detect-malicious-URLs 作者: faizann24 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def TL():
    allurls = './data/data.csv' #path to our all urls file
    allurlscsv = pd.read_csv(allurls,',',error_bad_lines=False) #reading file
    allurlsdata = pd.DataFrame(allurlscsv)  #converting to a dataframe

    allurlsdata = np.array(allurlsdata) #converting it into an array
    random.shuffle(allurlsdata) #shuffling

    y = [d[1] for d in allurlsdata] #all labels 
    corpus = [d[0] for d in allurlsdata]    #all urls corresponding to a label (either good or bad)
    vectorizer = TfidfVectorizer(tokenizer=getTokens)   #get a vector for each url but use our customized tokenizer
    X = vectorizer.fit_transform(corpus)    #get the X vector

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)   #split into training and testing set 80/20 ratio

    lgs = LogisticRegression()  #using logistic regression
    lgs.fit(X_train, y_train)
    print(lgs.score(X_test, y_test))    #pring the score. It comes out to be 98%
    return vectorizer, lgs
tests.py 文件源码 项目:scikit-mdr 作者: EpistasisLab 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_mdr_sklearn_pipeline():
    """Ensure that MDR can be used as a transformer in a scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True))
    assert np.mean(cv_scores) > 0.
tests.py 文件源码 项目:scikit-mdr 作者: EpistasisLab 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_mdr_sklearn_pipeline_parallel():
    """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1)
    assert np.mean(cv_scores) > 0.
behaviorView.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        self.classifier = SVC()
        self.classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}
        for user in self.testDict:
            pred_labels[user] = self.classifier.predict([[self.BDS[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel

        return pred_labels
unigram.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)

        corpus = [dictionary.doc2bow(text) for text in corpus]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
TF_IDF.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)
        corpus = [dictionary.doc2bow(text) for text in corpus]
        model = models.TfidfModel(corpus)
        corpus = [text for text in model[corpus]]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
DegreeSAD.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)
        pred_labels = {}
        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)

        for user in self.testDict:
            pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
metrics.py 文件源码 项目:WEARING 作者: nlkim0817 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
top_factors.py 文件源码 项目:healthcareai-py 作者: HealthCatalyst 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def prepare_fit_model_for_factors(model_type, x_train, y_train):
    """
    Given a model type, train and test data

    Args:
        model_type (str): 'classification' or 'regression'
        x_train:
        y_train:

    Returns:
        (sklearn.base.BaseEstimator): A fit model.
    """

    if model_type == 'classification':
        algorithm = LogisticRegression()
    elif model_type == 'regression':
        algorithm = LinearRegression()
    else:
        algorithm = None

    if algorithm is not None:
        algorithm.fit(x_train, y_train)

    return algorithm
models.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def run_predict_logistic_regression(X_train,Y_train,X_test,Y_test):
    clf = LogisticRegression()
    clf = clf.fit(X_train, Y_train)
    pred = clf.predict(X_test)
    print('Logistic 0-1 error. \n Training: ', zero_one_score(Y_train, clf.predict(X_train)),
          '\n Test:', zero_one_score(Y_test, pred))

    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def run_predict_logistic_regression(X_train,Y_train,X_test,Y_test):
    clf = LogisticRegression()
    clf = clf.fit(X_train, Y_train)
    pred = clf.predict(X_test)
    print('Logistic 0-1 error. \n Training: ', zero_one_score(Y_train, clf.predict(X_train)),
          '\n Test:', zero_one_score(Y_test, pred))

    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def run_predict_logistic_regression(X_train,Y_train,X_test,Y_test):
    clf = LogisticRegression()
    clf = clf.fit(X_train, Y_train)
    pred = clf.predict(X_test)
    print('Logistic 0-1 error. \n Training: ', zero_one_score(Y_train, clf.predict(X_train)),
          '\n Test:', zero_one_score(Y_test, pred))

    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def run_predict_logistic_regression(X_train,Y_train,X_test,Y_test):
    clf = LogisticRegression()
    clf = clf.fit(X_train, Y_train)
    pred = clf.predict(X_test)
    print('Logistic 0-1 error. \n Training: ', zero_one_score(Y_train, clf.predict(X_train)),
          '\n Test:', zero_one_score(Y_test, pred))

    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def run_predict_logistic_regression(X_train,Y_train,X_test,Y_test):
    clf = LogisticRegression()
    clf = clf.fit(X_train, Y_train)
    pred = clf.predict(X_test)
    print('Logistic 0-1 error. \n Training: ', zero_one_score(Y_train, clf.predict(X_train)),
          '\n Test:', zero_one_score(Y_test, pred))

    return clf
test_ner.py 文件源码 项目:conec 作者: cod3licious 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train_clf(self, trainfiles):
        # tokens: list of words, labels: list of corresponding labels
        # go document by document because of local context
        final_labels = []
        featmat = []
        for trainfile in trainfiles:
            for tokens, labels in yield_tokens_labels(trainfile):
                final_labels.extend(labels)
                featmat.append(self.make_featmat_rep(tokens))
        featmat = np.vstack(featmat)
        print("training classifier")
        clf = logreg(class_weight='balanced', random_state=1)
        clf.fit(featmat, final_labels)
        self.clf = clf
identify_singing_voice_gender.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
LR.py 文件源码 项目:PersonalizedMultitaskLearning 作者: mitmedialab 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def reloadHelper():
    reload(helper)

# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html


问题


面经


文章

微信
公众号

扫码关注公众号