def get_model():
if FLAGS.model == 'logistic':
return linear_model.LogisticRegressionCV(class_weight='balanced',
scoring='roc_auc',
n_jobs=FLAGS.n_jobs,
max_iter=10000, verbose=1)
elif FLAGS.model == 'random_forest':
return ensemble.RandomForestClassifier(n_estimators=100,
n_jobs=FLAGS.n_jobs,
class_weight='balanced',
verbose=1)
elif FLAGS.model == 'svm':
return grid_search.GridSearchCV(
estimator=svm.SVC(kernel='rbf', gamma='auto',
class_weight='balanced'),
param_grid={'C': np.logspace(-4, 4, 10)}, scoring='roc_auc',
n_jobs=FLAGS.n_jobs, verbose=1)
else:
raise ValueError('Unrecognized model %s' % FLAGS.model)
python类LogisticRegressionCV()的实例源码
def init_clf(clf_used, params=None):
if params is not None:
params_used = params
elif clf_used == 'svm':
params_used = svm_params
elif clf_used == 'ada_boost':
params_used = rf_params
elif clf_used == 'lr':
params_used = lr_params
else:
params_used = rf_params
if clf_used == 'svm':
clf = SVC(**params_used)
elif clf_used == 'ada_boost':
rf = RandomForestClassifier(**rf_params)
clf = AdaBoostClassifier(base_estimator=rf, **params_used)
elif clf_used == 'lr':
clf = LogisticRegressionCV(**params_used)
else:
clf = RandomForestClassifier(**params_used)
return clf
def train_lr(densities_pos, densities_neg, uncerts_pos, uncerts_neg):
"""
TODO
:param densities_pos:
:param densities_neg:
:param uncerts_pos:
:param uncerts_neg:
:return:
"""
values_neg = np.concatenate(
(densities_neg.reshape((1, -1)),
uncerts_neg.reshape((1, -1))),
axis=0).transpose([1, 0])
values_pos = np.concatenate(
(densities_pos.reshape((1, -1)),
uncerts_pos.reshape((1, -1))),
axis=0).transpose([1, 0])
values = np.concatenate((values_neg, values_pos))
labels = np.concatenate(
(np.zeros_like(densities_neg), np.ones_like(densities_pos)))
lr = LogisticRegressionCV(n_jobs=-1).fit(values, labels)
return values, labels, lr
def predict(self,X):
self.sents_test=X
self.sents_all=self.sents_train + self.sents_test
if self.sents_shuffle :
s_indexs=range(len(self.sents_all))
random.shuffle(s_indexs)
s_invers_indexs=range(len(s_indexs))
for n in range(len(s_indexs)):
s_invers_indexs[s_indexs[n]]=n
sents_all=[self.sents_all[n] for n in s_indexs]
else:
sents_all=self.sents_all
all_docs = list(LabeledListSentence(self.sents_all))
self.doc2vec_set(all_docs)
#print 'size',self.doc2vec.vector_size
self.X_train= [self.doc2vec.infer_vector(s) for s in self.sents_train]
self.X_test= [self.doc2vec.infer_vector(s) for s in self.sents_test]
self.logistic =LogisticRegressionCV(class_weight='balanced')#,n_jobs=-1)
self.logistic.fit(self.X_train,self.Y_train)
Y_test_predict=self.logistic.predict(self.X_test)
return Y_test_predict
def __init__(self, data, N_i, N_c, *args, **kwargs):
""" Fit a random forest model to a Dataset object.
N_i, N_c: parameters defining allowed time windows. See the
transform_X method.
args, kwargs: passed to the LogisticRegressionCV constructor.
"""
Wrapper.__init__(self,data,N_i,N_c)
kwargs['n_estimators'] = 128
self.classifier = RandomForestClassifier(*args, **kwargs)
self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs):
""" Fit a random forest model to a Dataset object.
N_i, N_c: parameters defining allowed time windows. See the
transform_X method.
args, kwargs: passed to the LogisticRegressionCV constructor.
"""
Wrapper.__init__(self,data,N_i,N_c)
kwargs['n_estimators'] = 1024
self.classifier = RandomForestClassifier(*args, **kwargs)
self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs):
""" Fit a random forest model to a Dataset object.
N_i, N_c: parameters defining allowed time windows. See the
transform_X method.
args, kwargs: passed to the LogisticRegressionCV constructor.
"""
Wrapper.__init__(self,data,N_i,N_c)
kwargs['n_estimators'] = 32768
self.classifier = RandomForestClassifier(*args, **kwargs)
self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs):
""" Fit a regularized logistic regression model to a Dataset object.
By default, uses L1 regularization with the strength chosen from
10 options spaced logarithmically between 1e-4 and 1e4
(the sklearn LogisticRegressionCV default) using
min(10,data.n_subjects) folds of crossvalidation, but other
options may be chosen by specifing arguments to the
LogisticRegressionCV constructor through *args and **kwargs.
N_i, N_c: parameters defining allowed time windows. See the
transform_X method.
args, kwargs: passed to the LogisticRegressionCV constructor.
"""
Wrapper.__init__(self,data,N_i,N_c)
default_folds = min(10,data.n_subjects)
default_classifier_arguments = {
'cv': default_folds,
'solver': 'liblinear',
'penalty': 'l1',
}
# Update with the arguments passed in by the user, clobbering
# the default settings if alternate values are provided.
default_classifier_arguments.update(kwargs)
self.classifier = LogisticRegressionCV(
*args,
**default_classifier_arguments
)
self.classifier.fit(self.fit_X,self.fit_y)
def logistic_fidelity(self):
#group data and assign state labels
gnd_features = np.hstack([np.real(self.ground_data.T),
np.imag(self.ground_data.T)])
ex_features = np.hstack([np.real(self.excited_data.T),
np.imag(self.excited_data.T)])
#liblinear wants arrays in C order
features = np.ascontiguousarray(np.vstack([gnd_features, ex_features]))
state = np.ascontiguousarray(np.hstack([np.zeros(self.ground_data.shape[1]),
np.ones(self.excited_data.shape[1])]))
#Set up logistic regression with cross-validation using liblinear.
#Cs sets the inverse of the regularization strength, which will be optimized
#through cross-validation. Uses the default Stratified K-Folds
#CV generator, with 3 folds.
#This is set up to be as consistent with the MATLAB implementation
#as I can make it. --GJR
Cs = np.logspace(-1,2,5)
logreg = LogisticRegressionCV(Cs, cv=3, solver='liblinear')
logreg.fit(features, state) #fit the model
predictions = logreg.predict(features) #in-place classification
score = logreg.score(features,state) #mean accuracy of classification
N = len(predictions)
S = np.sum(predictions == state) #how many we got right
#now calculate confidence intervals
c = 0.95
flo = betaincinv(S+1, N-S+1, (1-c)/2., )
fhi = betaincinv(S+1, N-S+1, (1+c)/2., )
logger.info(("In-place logistic regression fidelity: " +
"{:.2f}% ({:.2f}, {:.2f})".format(100*score, 100*flo, 100*fhi)))
def build_logistic_RegressionCV(x_train, y_train):
lr_cv_model = LogisticRegressionCV(n_jobs=-1, random_state=42,Cs=3, cv=10, refit=True, class_weight="balanced")
lr_cv_model.fit(x_train, y_train)
return lr_cv_model
def feature_selection_logit(xtr, ytr):
model = LogisticRegressionCV(penalty='l1', solver='liblinear', cv=5)
model.fit(xtr, ytr)
columns = np.arange(xtr.shape[1])[~np.isclose(model.coef_.ravel(), 0)]
return columns
def sklearn_logit(self,Xtrain,ytrain, Xtest, ytest):
clf = linear_model.LogisticRegressionCV(penalty='l2', class_weight='balanced', intercept_scaling=1e3, cv=5)
clf.fit (Xtrain, ytrain)
coeffients = clf.coef_
print "coefficients:", coeffients
print "intercept:", clf.intercept_
# predict train labels
train_predictions = clf.predict(Xtrain)
train_accuracy = self.calculate_accuracy(train_predictions, ytrain)
print "train accuracy: ", train_accuracy * 100
MSE_train = self.calculate_MSE(train_predictions, ytrain)
print "train MSE: ", MSE_train
AIC_train = len(ytrain) * np.log(MSE_train) + 2 * (p + 1)
print "train AIC:", AIC_train
for i in range(len(train_predictions)):
train_predictions[i] = round(train_predictions[i])
train_confMatrix = confusion_matrix(ytrain, train_predictions, labels = [1.0, 0.0])
print "train confusion matrix:", train_confMatrix
# predict test labels
test_predictions = clf.predict(Xtest)
test_accuracy = self.calculate_accuracy(test_predictions, ytest)
print "test accuracy: ", test_accuracy * 100
MSE_test = self.calculate_MSE(test_predictions, ytest)
print "test MSE: ", MSE_test
for i in range(len(test_predictions)):
test_predictions[i] = round(test_predictions[i])
test_confMatrix = confusion_matrix(ytest, test_predictions, labels = [1.0, 0.0])
print "test confusion matrix:", test_confMatrix
def __init__(self, bootstrap_fraction, random_seed=None, feature_importance_metric=None, feature_importance_threshold=None, **kwargs):
self.Cs = kwargs.get('Cs', 10)
self.fit_intercept = kwargs.get('fit_intercept', True)
self.cv = kwargs.get('cv', None)
self.dual = kwargs.get('dual', False)
self.scoring = kwargs.get('scoring', None)
self.tol = kwargs.get('tol', 1e-4)
self.max_iter = kwargs.get('max_iter', 100)
self.class_weight = kwargs.get('class_weight', None)
self.n_jobs = kwargs.get('n_jobs', 1)
self.verbose = kwargs.get('verbose', 0)
self.refit = kwargs.get('refit', True)
self.intercept_scaling = kwargs.get('intercept_scaling', 1.0)
self.multi_class = kwargs.get('multi_class', 'ovr')
self.random_state = kwargs.get('random_state', None)
# The following parameters are changed from default
# since we want to induce sparsity in the final
# feature set of Bolasso.
# liblinear is needed to be working with 'L1' penalty.
self.logit = LogisticRegressionCV(
Cs=self.Cs,
fit_intercept=self.fit_intercept,
cv=self.cv,
dual=self.dual,
penalty='l1',
scoring=self.scoring,
solver='liblinear',
tol=self.tol,
max_iter=self.max_iter,
class_weight=self.class_weight,
n_jobs=self.n_jobs,
verbose=self.verbose,
refit=self.refit,
intercept_scaling=self.intercept_scaling,
multi_class=self.multi_class,
random_state=self.random_state
)
super(Bolasso, self).__init__(bootstrap_fraction, self.logit, random_seed=random_seed, feature_importance_metric=feature_importance_metric, feature_importance_threshold=feature_importance_threshold)
def one_set(A, y, cv, final_model, names, feature_names, results_dir, train_func=None, predict_func=None, baseline=None):
log.info("Starting {} analysis.".format(results_dir))
#create storage directory
if not os.path.exists(results_dir):
os.makedirs(results_dir)
fpr_array, tpr_array, thresh_array, oob_estimates = validation.compute_cv(cv, final_model, A, y, train_func, predict_func)
log.info("Building storage record.")
result = validation.create_record(final_model, y, cv, names, fpr_array, tpr_array, thresh_array, oob_estimates)
try:
#if logistic regression get feature weights
if 'logitreg' in final_model.named_steps:
logitreg = final_model.named_steps['logitreg']
logit_out = {}
logit_out['lambda'] = (1.0/logitreg.Cs_).tolist();
logit_out['lambda_best'] = (1.0/logitreg.C_).tolist()[0];
#now get the empty
valid_idx = final_model.named_steps['empty'].get_important_indicies()
ordered = zip(valid_idx, logitreg.coef_.ravel())
ordered = sorted(ordered, key=lambda o: -np.abs(o[1]))
out_dict = []
max_value = np.abs(ordered[0][1])
for idx, value in ordered:
if max_value*1.e-6>np.abs(value):
break;
out_dict.append({'name' : feature_names[idx], 'value' : value })
logit_out['type'] = 'LogisticRegressionCV'
logit_out['nnz'] = len(out_dict)
logit_out['weights'] = out_dict
logit_out['offset'] = logitreg.intercept_[0]
#store the result
result['model'] = logit_out
except:
tb = traceback.format_exc()
log.error(tb)
log.info('Created results.')
path = validation.store_record(result, results_dir, 'full_time', False)
log.info('Stored results to directory %s.' % (str(path)))
log.info("Finished!")