python类SGDClassifier()的实例源码

test_mnb_modelmanager.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_init():
    mm = mnb_modelmanager.MNBModelManager('sgdcmodel.pickle')
    assert isinstance(mm, modelmanager.ModelManager)
    assert isinstance(mm.clf, Pipeline)
    assert isinstance(mm.clf.named_steps['clf'], SGDClassifier)
test_classifytext.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_init():
    ct = classifytext.ClassifyText()
    assert isinstance(ct.mm, sgdc_modelmanager.SGDCModelManager)
    assert isinstance(ct.mm.clf, Pipeline)
    assert isinstance(ct.mm.clf.named_steps['clf'], SGDClassifier)
test_classifytext.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_init_sgdc():
    ct = classifytext.ClassifyText(type=classifytext.SGDC)
    assert isinstance(ct.mm, sgdc_modelmanager.SGDCModelManager)
    assert isinstance(ct.mm.clf, Pipeline)
    assert isinstance(ct.mm.clf.named_steps['clf'], SGDClassifier)
trainer.py 文件源码 项目:fastxml 作者: Refefer 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def train_clf(self, X, idxss, rs):
        N = sum(len(idx) for idx in idxss)
        n_epochs = self.compute_epochs(N)

        if self.optimization == 'fastxml':
            penalty = 'l1'
        else:
            penalty = 'l2'

        X_train, y_train = self.build_XY(X, idxss, rs)

        in_liblinear = X_train.shape[0] > (self.auto_weight * self.max_leaf_size)
        if self.engine == 'liblinear' or (self.engine == 'auto' and in_liblinear):
            if self.loss == 'log':
                # No control over penalty
                clf = LogisticRegression(solver='liblinear', random_state=rs, tol=1, 
                        C=self.C, penalty=penalty)
            else:
                clf = LinearSVC(C=self.C, fit_intercept=self.bias, 
                        max_iter=n_epochs, class_weight='balanced', 
                        penalty=penalty, random_state=rs)

        else:
            clf = SGDClassifier(loss=self.loss, penalty=penalty, n_iter=n_epochs, 
                    alpha=self.alpha, fit_intercept=self.bias, class_weight='balanced',
                    random_state=rs)

        clf.fit(X_train, y_train)

        # Halves the memory requirement
        clf.coef_ = sparsify(clf.coef_, self.eps)
        if self.bias:
            clf.intercept_ = clf.intercept_.astype('float32')

        return clf, CLF(clf.coef_, clf.intercept_)
BR.py 文件源码 项目:molearn 作者: jmread 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def demo():
    import sys
    sys.path.append( '../core' )
    from tools import make_XOR_dataset

    X,Y = make_XOR_dataset()
    N,L = Y.shape

    br = BR(L, linear_model.SGDClassifier(n_iter=100))
    br.fit(X, Y)
    # test it
    print(br.predict(X))
    print("vs")
    print(Y)
ELM.py 文件源码 项目:molearn 作者: jmread 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def demo():

    import sys
    sys.path.append( '../core' )
    from tools import make_XOR_dataset
    from BR import BR
    set_printoptions(precision=3, suppress=True)

    X,Y = make_XOR_dataset()
    N,L = Y.shape

    print("CLASSIFICATION")
    h = linear_model.SGDClassifier(n_iter=100)
    nn = ELM(8,f=tanh,h=BR(-1,h))
    nn.fit(X, Y)
    # test it
    print(nn.predict(X))
    print("vs")
    print(Y)

    print("REGRESSION")
    r = ELM(100,h=linear_model.LinearRegression())
    r.fit(X,Y)
    print(Y)
    print(r.predict(X))

    print("REGRESSION OI")
    r = ELM_OI(100,h=BR(-1,h=linear_model.SGDRegressor()))
    r.fit(X,Y)
    print(Y)
    print(r.predict(X))
classifier_chains.py 文件源码 项目:molearn 作者: jmread 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def demo():
    import sys
    from molearn.core.tools import make_XOR_dataset

    X,Y = make_XOR_dataset()
    N,L = Y.shape

    print(Y)
    print("vs")

    print("RCC")
    cc = RCC(SGDClassifier(n_iter=100,loss='log'))
    cc.fit(X, Y)
    print(cc.predict(X))

    print("MCC")
    mcc = MCC(SGDClassifier(n_iter=100,loss='log'),M=1000)
    mcc.fit(X, Y)
    Yp = mcc.predict(X, M=50)
    print("with 50 iterations ...")
    print(Yp)
    Yp = mcc.predict(X, 'default')
    print("with default (%d) iterations ..." % 1000)
    print(Yp)

    print("PCC")
    pcc = PCC(SGDClassifier(n_iter=100,loss='log'))
    pcc.fit(X, Y)
    print(pcc.predict(X))
classifier_svm.py 文件源码 项目:text-classification 作者: cahya-wirawan 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def fit(self, dataset, filename):
        self.logger.debug("fit")
        self.clf = Pipeline([('vect', CountVectorizer()),
                             ('tfidf', TfidfTransformer()),
                             ('clf', SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42)),
                             ])
        self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
        joblib.dump(self.clf, filename + ".pkl", compress=9)
svm_utils.py 文件源码 项目:ml_defense 作者: arjunbhagoji 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def model_trainer(model_dict, X_train, y_train, adv=None, rd=None, rev=None):
    """Trains and returns SVM. Also save SVM to file."""

    print('Training model...')
    start_time = time.time()
    abs_path_m = resolve_path_m(model_dict)
    svm_model = model_dict['svm_type']
    C = model_dict['penconst']
    penalty = model_dict['penalty']
    if adv is None:
        adv_mag = None

    # Create model based on parameters
    if svm_model == 'linear':
        dual = True
        if penalty == 'l1':
            dual = False
        clf = svm.LinearSVC(C=C, penalty=penalty, dual=dual)
        # clf = linear_model.SGDClassifier(alpha=C,l1_ratio=0)
    elif svm_model != 'linear':
        clf = svm.SVC(C=C, kernel=svm_model)

    # Train model
    clf.fit(X_train, y_train)
    print('Finish training in {:d}s'.format(int(time.time() - start_time)))

    # Save model
    joblib.dump(clf, abs_path_m +
                get_svm_model_name(model_dict, rd, rev) + '.pkl')
    return clf
#------------------------------------------------------------------------------#
sklearn_estimators.py 文件源码 项目:gcForest 作者: kingfengji 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self,name,kwargs):
        from sklearn.linear_model import SGDClassifier
        super(GCSGDClassifier,self).__init__(name,SGDClassifier,kwargs)
model_error.py 文件源码 项目:feedlark 作者: CPSSD 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def get_model_score(training, validation):
    model = linear_model.SGDClassifier(loss='log', n_iter=5)
    model.fit(get_input_data(training), get_output_data(training))
    curr_score = model.score(get_input_data(validation), get_output_data(validation))
    return curr_score
predict.py 文件源码 项目:feedlark 作者: CPSSD 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self):
        # loss="log" makes it use logistic regression
        self.model = linear_model.SGDClassifier(loss="log", n_iter=5)
imdb_success_predictor.py 文件源码 项目:Movie-Success-Predictor 作者: Blueteak 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def main():
    #before_release
    movie_info_before_release = load_movie_info_before_release()
    print '***Before release***'

    X = create_input(movie_info_before_release)
    Y = create_output_before_release(movie_info_before_release)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'before_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'before_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'before_release')

    #After release
    movie_info = load_movie_info()
    print '***After release***' 

    X = create_input(movie_info)
    Y = create_output(movie_info)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'after_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'after_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'after_release')
citation_eval.py 文件源码 项目:GraphSAGE 作者: williamleif 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import f1_score
    dummy = DummyClassifier()
    dummy.fit(train_embeds, train_labels)
    log = SGDClassifier(loss="log", n_jobs=10)
    log.fit(train_embeds, train_labels)
    print("F1 score:", f1_score(test_labels, log.predict(test_embeds), average="micro"))
    print("Random baseline f1 score:", f1_score(test_labels, dummy.predict(test_embeds), average="micro"))
reddit_eval.py 文件源码 项目:GraphSAGE 作者: williamleif 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import f1_score
    dummy = DummyClassifier()
    dummy.fit(train_embeds, train_labels)
    log = SGDClassifier(loss="log", n_jobs=55)
    log.fit(train_embeds, train_labels)
    print("Test scores")
    print(f1_score(test_labels, log.predict(test_embeds), average="micro"))
    print("Train scores")
    print(f1_score(train_labels, log.predict(train_embeds), average="micro"))
    print("Random baseline")
    print(f1_score(test_labels, dummy.predict(test_embeds), average="micro"))
ml_framework.py 文件源码 项目:FLASH 作者: yuyuz 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_data_preprocessor_balancing(params, y):
    d_balancing = params['layer_dict_list'][1]

    if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = None
        # for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
        params['sample_weight'] = None
    elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = 'auto'
        # for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
        if len(y.shape) > 1:
            offsets = [2 ** i for i in range(y.shape[1])]
            y_ = np.sum(y * offsets, axis=1)
        else:
            y_ = y
        unique, counts = np.unique(y_, return_counts=True)
        cw = 1. / counts
        cw = cw / np.mean(cw)
        sample_weight = np.ones(y_.shape)
        for i, ue in enumerate(unique):
            mask = y_ == ue
            sample_weight[mask] *= cw[i]
        params['sample_weight'] = sample_weight

    return params
transact.py 文件源码 项目:banking-class 作者: eli-goodfriend 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
            model_type='logreg',C=10.0,
            alpha=1.0, cutoff=0.50, n_iter=1):
    # pull relevant data and run parsing and classification
    df = pd.read_csv(filename) 
    if (len(df.columns)==2): # make sure columns have the right names
        df.columns = ['raw','amount']

    if new_run: # initialize the model;
        if model_type=='logreg':
            model = linear_model.SGDClassifier(loss='log',warm_start=True,
                                           n_iter=n_iter,alpha=alpha)
        elif model_type=='passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
        elif model_type=='naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
    else: # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    fileCities = dirs.data_dir + 'cities_by_state.pickle'
    us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout,index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()


# ------ testing functions
classification.py 文件源码 项目:nba-games 作者: ixarchakos 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def model_fitting(train_set, train_labels, classifier_name, n_jobs=cpu_count()):
    """
    The fitting process with sklearn algorithms.
    :param train_set: numpy array, required
    :param train_labels: list, required
    :param classifier_name: string, required
    :param n_jobs: integer, required
    :return: object
        - Fit classifier model according to the given training data
    """
    classifier_list = {"svm_linear": SVC(probability=True, kernel='linear', C=1.0),
                       "svm_poly": SVC(probability=True, kernel='poly', C=1.0),
                       "svm_rbf": SVC(probability=True, kernel='rbf', C=1.0, gamma=0.01),
                       "linear_svc": LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.1, C=1.0, multi_class='ovr', fit_intercept=True,
                                               intercept_scaling=1, random_state=None, max_iter=3000),
                       "knn": KNeighborsClassifier(n_neighbors=100, weights='distance', leaf_size=30, n_jobs=n_jobs),
                       "random_forests": RandomForestClassifier(n_estimators=350, criterion='entropy', min_samples_split=2,
                                                                min_samples_leaf=1, max_leaf_nodes=600, n_jobs=n_jobs),
                       "logistic_regression": LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.4, fit_intercept=True, intercept_scaling=1,
                                                                 random_state=None, solver='liblinear', max_iter=1000, multi_class='ovr',
                                                                 warm_start=False, n_jobs=n_jobs),
                       "decision_trees": DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2,
                                                                min_samples_leaf=100, min_weight_fraction_leaf=0.0, max_features=None,
                                                                random_state=None, max_leaf_nodes=None, presort=False),
                       "sgd": SGDClassifier(alpha=.0001, n_iter=500, penalty="elasticnet", n_jobs=n_jobs),
                       "neural_network": Classifier(layers=[Layer("Sigmoid", units=14), Layer("Sigmoid", units=13), Layer("Sigmoid", units=12),
                                                            Layer("Sigmoid", units=10), Layer("Softmax")], learning_rate=0.01, n_iter=200,
                                                    batch_size=10, regularize='L1', n_stable=50, dropout_rate=0, verbose=True),
                       "GBC": GradientBoostingClassifier(max_depth=10, max_leaf_nodes=850, min_samples_leaf=15, learning_rate=0.1),
                       "XGB": XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
                                            max_depth=10, min_child_weight=2, missing=None, n_estimators=100, nthread=n_jobs, reg_alpha=0,
                                            objective='binary:logistic', reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1)}
    return classifier_list[classifier_name].fit(train_set, train_labels)
svm.py 文件源码 项目:opentc 作者: cahya-wirawan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def fit(self, dataset, filename):
        self.logger.debug("fit")
        self.clf = Pipeline([('vect', CountVectorizer()),
                             ('tfidf', TfidfTransformer()),
                             ('clf', SGDClassifier(loss='log', penalty='l2', alpha=1e-3, n_iter=5, random_state=42)),
                             ])
        self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
        joblib.dump(self.clf, filename + ".pkl", compress=9)
classifier.py 文件源码 项目:quantulum 作者: marcolagi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def train_classifier(download=True, parameters=None, ngram_range=(1, 1)):
    """Train the intent classifier."""
    if download:
        download_wiki()

    path = os.path.join(l.TOPDIR, 'train.json')
    training_set = json.load(open(path))
    path = os.path.join(l.TOPDIR, 'wiki.json')
    wiki_set = json.load(open(path))

    target_names = list(set([i['unit'] for i in training_set + wiki_set]))
    train_data, train_target = [], []
    for example in training_set + wiki_set:
        train_data.append(clean_text(example['text']))
        train_target.append(target_names.index(example['unit']))

    tfidf_model = TfidfVectorizer(sublinear_tf=True,
                                  ngram_range=ngram_range,
                                  stop_words='english')

    matrix = tfidf_model.fit_transform(train_data)

    if parameters is None:
        parameters = {'loss': 'log', 'penalty': 'l2', 'n_iter': 50,
                      'alpha': 0.00001, 'fit_intercept': True}

    clf = SGDClassifier(**parameters).fit(matrix, train_target)
    obj = {'tfidf_model': tfidf_model,
           'clf': clf,
           'target_names': target_names}
    path = os.path.join(l.TOPDIR, 'clf.pickle')
    pickle.dump(obj, open(path, 'w'))


###############################################################################


问题


面经


文章

微信
公众号

扫码关注公众号