python类SGDClassifier()的实例源码

models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
recognition_utils.py 文件源码 项目:pybot 作者: spillai 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self, filename, target_map, classifier='svm'): 

        self.seed_ = 0
        self.filename_ = filename
        self.target_map_ = target_map
        self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32)
        self.epoch_no_ = 0
        self.st_time_ = time.time()

        # Setup classifier
        print('-------------------------------')        
        print('====> Building Classifier, setting class weights') 
        if classifier == 'svm': 
            self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']}
            self.clf_base_ = LinearSVC(random_state=self.seed_)
        elif classifier == 'sgd': 
            self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], 
            self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, 
                                      warm_start=True, n_jobs=-1, n_iter=1, verbose=4)
        else: 
            raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' 
                            % classifier)
classify.py 文件源码 项目:oss-github-analysis-project 作者: itu-oss-project-team 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
sgd.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def try_params( n_iterations, params ):

    n_iterations = int( round( n_iterations ))
    print "n_iterations:", n_iterations
    pprint( params )

    if params['scaler']:
        scaler = eval( "{}()".format( params['scaler'] ))
        x_train_ = scaler.fit_transform( data['x_train'].astype( float ))
        x_test_ = scaler.transform( data['x_test'].astype( float ))

        local_data = { 'x_train': x_train_, 'y_train': data['y_train'], 
          'x_test': x_test_, 'y_test': data['y_test'] }
    else:
        local_data = data

    # we need a copy because at the next small round the best params will be re-used
    params_ = dict( params )
    params_.pop( 'scaler' )

    clf = SGD( n_iter = n_iterations, **params_ )

    return train_and_eval_sklearn_classifier( clf, local_data )
classifier.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def learn(self, features, labels):
        """ Fits the classifier

        If it's state is empty, the classifier is fitted, if not
        the classifier is partially fitted.
        See sklearn's SGDClassifier fit and partial_fit methods.

        Args:
            features (:obj:`list` of :obj:`list` of :obj:`float`)
            labels (:obj:`list` of :obj:`str`): Labels for each set of features.
                New features are learnt.
        """
        labels = np.ravel(labels)
        self.__learn_labels(labels)
        if len(labels) == 0:
            return

        labels = self.labels.transform(labels)
        if self.feature_length > 0 and hasattr(self.clf, 'partial_fit'):
            # FIXME? check docs, may need to pass class=[...]
            self.clf = self.clf.partial_fit(features, labels)
        else:
            self.clf = self.clf.fit(features, labels)
            self.feature_length = len(features[0])
text_classifier.py 文件源码 项目:textar 作者: datosgobar 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def make_classifier(self, name, ids, labels):
        """Entrenar un clasificador SVM sobre los textos cargados.

        Crea un clasificador que se guarda en el objeto bajo el nombre `name`.

        Args:
            name (str): Nombre para el clasidicador.
            ids (list): Se espera una lista de N ids de textos ya almacenados
                en el TextClassifier.
            labels (list): Se espera una lista de N etiquetas. Una por cada id
                de texto presente en ids.
        Nota:
            Usa el clasificador de `Scikit-learn <http://scikit-learn.org/>`_
        """
        if not all(np.in1d(ids, self.ids)):
            raise ValueError("Hay ids de textos que no se encuentran \
                              almacenados.")
        setattr(self, name, SGDClassifier())
        classifier = getattr(self, name)
        indices = np.searchsorted(self.ids, ids)
        classifier.fit(self.tfidf_mat[indices, :], labels)
test_searchgrid.py 文件源码 项目:searchgrid 作者: jnothman 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[set_grid(clf1, kernel=['linear']),
                              clf2,
                              set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                              clf4])
    param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
                  {'clf': [clf3], 'clf__kernel': ['poly'],
                   'clf__degree': [2, 3], 'sel__k': [2, 3]},
                  {'clf': [clf2, clf4], 'sel__k': [2, 3]}]
    assert build_param_grid(estimator) == param_grid
pipeline_factory.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_sgdc(self):
        return Pipeline([
            ('tfidf', TfidfVectorizer(stop_words=sw.words('dutch'), norm='l2', use_idf=True)),
            ('feat_select', SelectPercentile(percentile=10)),
            ('clf', SGDClassifier(alpha=0.0001,
                                  average=False,
                                  class_weight=None,
                                  epsilon=0.1,
                                  eta0=0.0,
                                  fit_intercept=True,
                                  l1_ratio=0.15,
                                  learning_rate='optimal',
                                  loss='log',
                                  n_iter=10,
                                  n_jobs=1,
                                  penalty='l2',
                                  power_t=0.5,
                                  random_state=None,
                                  shuffle=True,
                                  verbose=0,
                                  warm_start=False))
        ])
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def run(self):
        training_x, training_y, training_ids = self.get_training_data()
        test_x, test_y, test_ids = self.get_test_data()
        clf = self.define_model(self.model_name, self.model_params)
        clf.fit(training_x, training_y)
        res_predict = clf.predict(test_x)
        if (self.model_name == "SGDClassifier" and (clf.loss =="hinge" or clf.loss == "perceptron")) or self.model_name == "linear.SVC":
            res = list(clf.decision_function(test_x))
        else:
            res = list(clf.predict_proba(test_x)[:,1])
        #fp, fn, tp, tn = self.compute_confusion_matrix(res[:,0], test_y)
        result_dictionary = {'training_ids': training_ids, 
                             'predictions_test_y': list(res_predict),
                             'prob_prediction_test_y': res ,
                             'test_y': list(test_y),
                             'test_ids': list(test_ids),
                             'model_name': self.model_name,
                             'model_params': self.model_params,
                             'label': self.label,
                             'feature_columns_used': self.cols_to_use,
                             'config': self.config,
                             'feature_importance': self.get_feature_importance(clf, self.model_name),
                             'columned_used_for_feat_importance': list(training_x.columns.values)}
        return  result_dictionary, clf
Ensemble.py 文件源码 项目:molearn 作者: jmread 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def demo():
    import sys
    sys.path.append( '../core' )
    from tools import make_XOR_dataset

    X,Y = make_XOR_dataset()
    N,L = Y.shape

    from sklearn import linear_model
    h_ = linear_model.SGDClassifier(n_iter=100)
    from CC import RCC
    cc = RCC(h=h_)
    e = Ensemble(n_estimators=10,base_estimator=cc)
    e.fit(X, Y)
    # test it
    print(e.predict(X))
    print("vs")
    print(Y)
SentiCR.py 文件源码 项目:SentiCR 作者: senticr 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
ppi_eval.py 文件源码 项目:GraphSAGE 作者: williamleif 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import f1_score
    from sklearn.multioutput import MultiOutputClassifier
    dummy = MultiOutputClassifier(DummyClassifier())
    dummy.fit(train_embeds, train_labels)
    log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
    log.fit(train_embeds, train_labels)

    f1 = 0
    for i in range(test_labels.shape[1]):
        print("F1 score", f1_score(test_labels[:,i], log.predict(test_embeds)[:,i], average="micro"))
    for i in range(test_labels.shape[1]):
        print("Random baseline F1 score", f1_score(test_labels[:,i], dummy.predict(test_embeds)[:,i], average="micro"))
unit_tests.py 文件源码 项目:banking-class 作者: eli-goodfriend 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_cat():
    print 'Testing categorization...'
    filein = 'test_lookup.csv'
    fileout = 'test_cat.csv'
    df = pd.read_csv(filein)

    model = linear_model.SGDClassifier(loss='log')

    catData = df[~df.category.isnull()]
    uncatData = df[df.category.isnull()]
    print str(float(len(catData))/float(len(df)) * 100.) + "% of transactions categorized with lookup."

    ts.train_model(catData,model,embeddings,model_type='logreg',new_run=True)
    ts.use_model(uncatData,model,embeddings,0.0,model_type='logreg')

    df = pd.concat([catData, uncatData])
    df.sort_index(inplace=True)

    df.to_csv(fileout,index=False)
vectorizer.py 文件源码 项目:python-machine-learning-book 作者: jeremyn 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def train_and_pickle_classifier():
    import numpy as np
    from sklearn.linear_model import SGDClassifier

    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)

    pickle.dump(clf, open(CLF_FILENAME, 'wb'), protocol=4)
ensemble.py 文件源码 项目:IBRel 作者: lasigeBioTM 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, path, etype, **kwargs):
        super(EnsembleModel, self).__init__(path, etype=etype, **kwargs)
        self.basedir = "models/ensemble/"
        self.goldstd = kwargs.get("goldstd")
        self.data = {}
        self.offsets = []
        self.pipeline = Pipeline(
            [
                #('clf', SGDClassifier(loss='hinge', penalty='l1', alpha=0.0001, n_iter=5, random_state=42)),
                #('clf', SGDClassifier())
                # ('clf', svm.NuSVC(nu=0.01 ))
                ('clf', RandomForestClassifier(class_weight={False:1, True:1}, n_jobs=-1, criterion="entropy", warm_start=True))
                # ('clf', tree.DecisionTreeClassifier(criterion="entropy")),
                # ('clf', MultinomialNB())
                # ('clf', GaussianNB())
                #('clf', svm.SVC(kernel="rbf", degree=2, C=1)),
                #('clf', svm.SVC(kernel="linear", C=2))
                #('clf', DummyClassifier(strategy="constant", constant=True))
            ])
scikitre.py 文件源码 项目:IBRel 作者: lasigeBioTM 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, corpus, relationtype, modelname="scikit_classifier"):
        super(ScikitRE, self).__init__()
        self.modelname = relationtype + "_" + modelname
        self.relationtype = relationtype
        self.pairtype = relationtype
        self.corpus = corpus
        self.pairs = []
        self.features = []
        self.labels = []
        self.pred = []
        self.clusters = word2vec.load_clusters("corpora/Thaliana/documents-processed-clusters.txt")
        self.posfmeasure = make_scorer(f1_score, average='binary', pos_label=True)
        self.generate_data(corpus, modelname, relationtype)
        self.text_clf = Pipeline([('vect', CountVectorizer(analyzer='char_wb', ngram_range=(3,20), min_df=0.0, max_df=0.7)),
                                  #('vect', CountVectorizer(ngram_range=(1,3), binary=False, max_features=None)),
                                  #('tfidf', TfidfTransformer(use_idf=True, norm="l2")),
                                  #('clf', SGDClassifier(loss='hinge', penalty='l1', alpha=0.0001, n_iter=5, random_state=42)),
                                  #('clf', SGDClassifier())
                                  #('clf', svm.NuSVC(nu=0.01 ))
                                   #('clf', RandomForestClassifier(class_weight={False:1, True:2}, n_jobs=-1))
                                  ('clf', MultinomialNB(alpha=0.01, fit_prior=False))
                                  #('clf', DummyClassifier(strategy="constant", constant=True))
                                 ])
test_from_model.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_transform_linear_model():
    for clf in (LogisticRegression(C=0.1),
                LinearSVC(C=0.01, dual=False),
                SGDClassifier(alpha=0.001, n_iter=50, shuffle=True,
                              random_state=0)):
        for thresh in (None, ".09*mean", "1e-5 * median"):
            for func in (np.array, sp.csr_matrix):
                X = func(data)
                clf.set_params(penalty="l1")
                clf.fit(X, y)
                X_new = assert_warns(
                    DeprecationWarning, clf.transform, X, thresh)
                if isinstance(clf, SGDClassifier):
                    assert_true(X_new.shape[1] <= X.shape[1])
                else:
                    assert_less(X_new.shape[1], X.shape[1])
                clf.set_params(penalty="l2")
                clf.fit(X_new, y)
                pred = clf.predict(X_new)
                assert_greater(np.mean(pred == y), 0.7)
test_from_model.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_prefit():
    """
    Test all possible combinations of the prefit parameter.
    """
    # Passing a prefit parameter with the selected model
    # and fitting a unfit model with prefit=False should give same results.
    clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=0)
    model = SelectFromModel(clf)
    model.fit(data, y)
    X_transform = model.transform(data)
    clf.fit(data, y)
    model = SelectFromModel(clf, prefit=True)
    assert_array_equal(model.transform(data), X_transform)

    # Check that the model is rewritten if prefit=False and a fitted model is
    # passed
    model = SelectFromModel(clf, prefit=False)
    model.fit(data, y)
    assert_array_equal(model.transform(data), X_transform)

    # Check that prefit=True and calling fit raises a ValueError
    model = SelectFromModel(clf, prefit=True)
    assert_raises(ValueError, model.fit, data, y)
example.py 文件源码 项目:sandbox-learn 作者: pavmav 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def score_function(field):
    stats = field.get_stats()
    if "Creature" not in stats:
        return 0
    else:
        return stats["Creature"]

# res = modelling.run_simulation(universe, check_stop_function, score_function, verbose=True, times=30)
# print res
# print np.asarray(res).mean()

# random 1000 10 [193, 37, 97, 224, 349, 165, 251, 130, 184, 335]
# SGDClassifier 1000 10 [9, 106, 127, 11, 187, 38, 193, 114, 236, 27]

# random 500 20 [63, 24, 38, 14, 30, 65, 29, 60, 28, 25, 93, 44, 51, 26, 104, 56, 53, 38, 23, 42] mean 45.299999999999997
# SGDClassifier 500 20 [116, 52, 50, 82, 109, 49, 109, 37, 25, 115, 130, 180, 52, 52, 113, 46, 34, 135, 26, 33] mean 77.25

# random 500 20 [71, 24, 57, 56, 34, 14, 75, 66, 41, 56, 29, 69, 30, 72, 40, 57, 49, 24, 41, 48] mean 47.65
# SGDClassifier 500 20 [175, 40, 117, 96, 119, 116, 58, 134, 67, 87, 73, 147, 124, 125, 82, 139, 78, 110, 74, 100] mean 103.05

# random 500 30 [42, 32, 62, 34, 30, 44, 51, 35, 63, 59, 50, 40, 75, 59, 50, 33, 45, 95, 82, 41, 43, 89, 94, 66, 64, 46, 34, 82, 66, 76]
# 56.0666666667
# SGDClassifier 500 30 [62, 85, 72, 42, 17, 48, 74, 53, 42, 73, 57, 29, 82, 51, 80, 84, 86, 73, 51, 36, 85, 85, 46, 59, 68, 33, 44, 38, 62, 26]
# 58.1
mephistopheies.py 文件源码 项目:kaggle-quora-question-pairs 作者: stys 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def compute_sgd(data):
    logging.info('Computing SGD')

    n_splits = 10
    folder = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for ix_first, ix_second in tqdm_notebook(folder.split(np.zeros(data['y_train'].shape[0]), data['y_train']),
                                             total=n_splits):
        # {'en__l1_ratio': 0.0001, 'en__alpha': 1e-05}
        model = SGDClassifier(
            loss='log',
            penalty='elasticnet',
            fit_intercept=True,
            n_iter=100,
            shuffle=True,
            n_jobs=-1,
            l1_ratio=0.0001,
            alpha=1e-05,
            class_weight=None)
        model = model.fit(data['X_train'][ix_first, :], data['y_train'][ix_first])
        data['y_train_pred'][ix_second] = logit(model.predict_proba(data['X_train'][ix_second, :])[:, 1])
        data['y_test_pred'].append(logit(model.predict_proba(data['X_test'])[:, 1]))

    data['y_test_pred'] = np.array(data['y_test_pred']).T.mean(axis=1)

    return data
util.py 文件源码 项目:code-uai16 作者: thanhan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def classify(n = 50):
    #clf = MultinomialNB(fit_prior=False)
    #clf = SVC(gamma=2, C=1, class_weight = {0.0:0.063829777, 1.0:1.0})
    clf = SGDClassifier(loss="log", penalty="l1", class_weight = {0.0:0.022, 1.0:1.0})

    clf.fit(mat[:n], rel[:n])
    return clf
backend.py 文件源码 项目:berlin-devfest-2016-backend 作者: giansegato 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def initialModeling(data):
    X, y = processData(data)

    global n
    n = X.shape[1]

    print "I'm training the model using ", X.shape[0], " samples and ", n, " features.\n"

    global model
    model = SGDClassifier(loss="log", alpha=100, verbose=1)
    model.fit(X, y)

# 6th: update model
classifier.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, classifier=None):
        if classifier:
            self.clf = classifier
        else:
            self.clf = SGDClassifier(loss="log", penalty="l2", shuffle=True, n_iter=2500)
        self.labels = preprocessing.LabelEncoder()
        self.feature_length = -1
classifier.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def predict(self, features, verbose=False):
        """ Probability estimates of each feature

        See sklearn's SGDClassifier predict and predict_proba methods.

        Args:
            features (:obj:`list` of :obj:`list` of :obj:`float`)
            verbose: Boolean, optional. If true returns an array where each
                element is a dictionary, where keys are labels and values are
                the respective probabilities. Defaults to False.
        Returns:
            Array of array of numbers, or array of dictionaries if verbose i
            True
        """
        probs = self.clf.predict_proba(features)
        if verbose:
            labels = self.labels.classes_
            res = []
            for prob in probs:
                vals = {}
                for i, val in enumerate(prob):
                    label = labels[i]
                    vals[label] = val
                res.append(vals)
            return res
        else:
            return probs
load_feature.py 文件源码 项目:EmotiW-2017-Audio-video-Emotion-Recognition 作者: xujinchang 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def do_l2norm(X_data):
    x_normalized=preprocessing.normalize(X_data,norm='l2')
    return x_normalized

#svm = SGDClassifier(loss = 'hinge')
#https://ljalphabeta.gitbooks.io/python-/content/kernelsvm.html
load_feature.py 文件源码 项目:EmotiW-2017-Audio-video-Emotion-Recognition 作者: xujinchang 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def use_SGD(X_data,y_data):
    clf = SGDClassifier(loss="hinge", penalty="l2")
    clf.fit(X_data, y_data)
    return clf

# def use_KNN(X_data,y_data):





# def use_RandomForest(X_data,y_data):
test_stochastic_gradient.py 文件源码 项目:dask-ml 作者: dask 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_basic(self, single_chunk_classification):
        X, y = single_chunk_classification

        a = lm.PartialSGDClassifier(classes=[0, 1], random_state=0,
                                    max_iter=1000, tol=1e-3)
        b = lm_.SGDClassifier(random_state=0, max_iter=1000, tol=1e-3)

        a.fit(X, y)
        b.partial_fit(X, y, classes=[0, 1])
        assert_estimator_equal(a, b, exclude='loss_function_')
test_sgdc_modelmanager.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_init_no_file():
    mm = sgdc_modelmanager.SGDCModelManager()
    assert isinstance(mm, sgdc_modelmanager.SGDCModelManager)
    assert isinstance(mm.clf, Pipeline)
    assert isinstance(mm.clf.named_steps['clf'], SGDClassifier)
test_sgdc_modelmanager.py 文件源码 项目:UrbanSearch 作者: urbansearchTUD 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_init():
    mm = sgdc_modelmanager.SGDCModelManager('sgdcmodel.pickle')
    assert isinstance(mm, modelmanager.ModelManager)
    assert isinstance(mm.clf, Pipeline)
    assert isinstance(mm.clf.named_steps['clf'], SGDClassifier)


问题


面经


文章

微信
公众号

扫码关注公众号