def one_set(A, y, cv, final_model, names, feature_names, results_dir, train_func=None, predict_func=None, baseline=None):
log.info("Starting {} analysis.".format(results_dir))
#create storage directory
if not os.path.exists(results_dir):
os.makedirs(results_dir)
fpr_array, tpr_array, thresh_array, oob_estimates = validation.compute_cv(cv, final_model, A, y, train_func, predict_func)
log.info("Building storage record.")
result = validation.create_record(final_model, y, cv, names, fpr_array, tpr_array, thresh_array, oob_estimates)
try:
#if logistic regression get feature weights
if 'logitreg' in final_model.named_steps:
logitreg = final_model.named_steps['logitreg']
logit_out = {}
logit_out['lambda'] = (1.0/logitreg.Cs_).tolist();
logit_out['lambda_best'] = (1.0/logitreg.C_).tolist()[0];
#now get the empty
valid_idx = final_model.named_steps['empty'].get_important_indicies()
ordered = zip(valid_idx, logitreg.coef_.ravel())
ordered = sorted(ordered, key=lambda o: -np.abs(o[1]))
out_dict = []
max_value = np.abs(ordered[0][1])
for idx, value in ordered:
if max_value*1.e-6>np.abs(value):
break;
out_dict.append({'name' : feature_names[idx], 'value' : value })
logit_out['type'] = 'LogisticRegressionCV'
logit_out['nnz'] = len(out_dict)
logit_out['weights'] = out_dict
logit_out['offset'] = logitreg.intercept_[0]
#store the result
result['model'] = logit_out
except:
tb = traceback.format_exc()
log.error(tb)
log.info('Created results.')
path = validation.store_record(result, results_dir, 'full_time', False)
log.info('Stored results to directory %s.' % (str(path)))
log.info("Finished!")
评论列表
文章目录