def lda(directories):
images = load(directories, True, permute=False)
f = HaarFeature()
x = []
for idx, im in enumerate(images):
print("%d/%d" % (idx, len(images)))
x.append(np.array(f.process(im)))
y_train = [im.label for im in images]
classes = list(set(y_train))
class_to_index = {key: index for index, key in enumerate(classes)}
labels = np.concatenate(np.array([[class_to_index[name] for name in y_train]]))
clf = ExtraTreesClassifier()
clf = clf.fit(x, labels)
w, h = f.size, f.size
i = 0
filtered = []
for size in f.haarSizes:
for x in range(w - size):
for y in range(h - size):
for haar_type in range(len(f.haars)):
score = clf.feature_importances_[i]
if score > 0.000001:
filtered.append((size, x, y, haar_type, score))
i += 1
sorted_filtered = sorted(filtered, key=lambda tup: tup[4], reverse=True)
text_file = open("haarImportance.txt", "w")
for k in sorted_filtered:
# print("[size=%d][x=%d][y=%d][type=%d] \t=> %f" % k)
text_file.write("[size=%d][x=%d][y=%d][type=%d] \t=> %f\n" % k)
text_file.close()
python类ExtraTreesClassifier()的实例源码
def _load_model(self, model_id):
_, conn = get_engine()
#todo
models = {
'QXgb': QXgb,
'QXgb2': QXgb2,
'Ridge': Ridge,
'RidgeClassifier': RidgeClassifier,
'KNeighborsClassifier': KNeighborsClassifier,
'QAvg': QAvg,
'QRankedAvg': QRankedAvg,
'QRankedByLineAvg': QRankedByLineAvg,
'QStackModel': QStackModel,
'LogisticRegression': LogisticRegression,
'DecisionTreeClassifier': DecisionTreeClassifier,
'QPostProcessingModel': QPostProcessingModel,
'RandomForestClassifier': RandomForestClassifier,
'ExtraTreesClassifier': ExtraTreesClassifier,
'QAvgOneModelData': QAvgOneModelData,
'QNN1': QNN1,
'QNN2': QNN2,
}
res = conn.execute(
"""
select cls, params, descr, predict_fn
from qml_models
where
model_id='{}'
""".format(model_id)
).fetchone()
if not res:
raise Exception('Missing {} model'.format(model_id))
model = models[res['cls']](**json.loads(res['params']))
self.add(model_id, model, res['descr'], res['predict_fn'])
return model
def models():
params = {'n_jobs':nthread,'random_state':seed,'class_weight':None}
# extra = ensemble.ExtraTreesClassifier(n_estimators=1000,max_features='auto',criterion= 'entropy',min_samples_split= 2, max_depth= None, min_samples_leaf= 1, **params)
# extra1 = ensemble.ExtraTreesClassifier(n_estimators=1000,max_features=60,criterion= 'gini',min_samples_split= 4, max_depth= 40, min_samples_leaf= 2, **params)
# rf = ensemble.RandomForestClassifier(n_estimators=1000,max_features= 'auto',criterion= 'gini',min_samples_split= 2, max_depth= None, min_samples_leaf= 1, **params)
# rf1 = ensemble.RandomForestClassifier(n_estimators=1000,max_features=60,criterion= 'entropy',min_samples_split= 4, max_depth= 40, min_samples_leaf= 2, **params)
# xgb_binlog = XGBClassifier(objective="binary:logistic" ,max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
# xgb_reglog = XGBClassifier(objective="reg:logistic", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
# xgb_poi = XGBClassifier(objective="count:poisson", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
# xgb_reglin = XGBClassifier(objective="reg:linear", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
rf_params = {'n_estimators':850,'max_features':60,'criterion':'entropy','min_samples_split': 4,'max_depth': 40, 'min_samples_leaf': 2, 'n_jobs': -1}
clfs = [
# (D1, XGBRegressor(objective="reg:linear", max_depth=6, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
(D1, XGBClassifier(objective="binary:logistic" ,max_depth=6, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
# (D1, XGBRegressor(objective="reg:linear", max_depth=5, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
# (D1,XGBClassifier(objective="binary:logistic", max_depth=5, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
# (D1, XGBRegressor(objective="reg:linear", max_depth=4, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
# (D1,XGBClassifier(objective="binary:logistic", max_depth=4, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
]
for clf in clfs:
yield clf
User_Interface.py 文件源码
项目:yttresearch-machine-learning-algorithms-analysis
作者: gdemos01
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def exportPresentationData(classifier,action):
dir = input('Give Data Directory: ')
if int(classifier)==1:
clf = GradientBoostingClassifier()
classify(dir,clf,action)
elif int(classifier) == 2:
clf = LogisticRegression()
classify(dir,clf,action)
elif int(classifier) == 3:
clf = KNeighborsClassifier(n_neighbors=5)
classify(dir,clf,action)
elif int(classifier) == 4:
clf = DecisionTreeClassifier()
classify(dir,clf,action)
elif int(classifier) == 5:
clf = svm.LinearSVC()
classify_type2(dir,clf,action)
elif int(classifier) == 6:
clf = RandomForestClassifier()
classify(dir,clf,action)
elif int(classifier) == 7:
clf = ExtraTreesClassifier()
classify(dir,clf,action)
elif int(classifier) == 8:
clf = IsolationForest()
classify_type2(dir,clf,action)
elif int(classifier) == 9:
clf = AdaBoostClassifier(n_estimators=100)
classify(dir,clf,action)
elif int(classifier) == 10:
clf = BaggingClassifier(DecisionTreeClassifier())
classify(dir,clf,action)
elif int(classifier) == 11:
clf1 = GradientBoostingClassifier()
clf2 = AdaBoostClassifier()
clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
classify(dir,clf,action)
Exporter.py 文件源码
项目:yttresearch-machine-learning-algorithms-analysis
作者: gdemos01
项目源码
文件源码
阅读 45
收藏 0
点赞 0
评论 0
def exportPresentationData(classifier,action,dir):
if int(classifier)==1:
clf = GradientBoostingClassifier()
classify(dir,clf,action)
elif int(classifier) == 2:
clf = LogisticRegression()
classify(dir,clf,action)
elif int(classifier) == 3:
clf = KNeighborsClassifier(n_neighbors=5)
classify(dir,clf,action)
elif int(classifier) == 4:
clf = DecisionTreeClassifier()
classify(dir,clf,action)
elif int(classifier) == 5:
clf = svm.LinearSVC()
classify_type2(dir,clf,action)
elif int(classifier) == 6:
clf = RandomForestClassifier()
classify(dir,clf,action)
elif int(classifier) == 7:
clf = ExtraTreesClassifier()
classify(dir,clf,action)
elif int(classifier) == 8:
clf = IsolationForest()
classify_type2(dir,clf,action)
elif int(classifier) == 9:
clf = AdaBoostClassifier(n_estimators=100)
classify(dir,clf,action)
elif int(classifier) == 10:
clf = BaggingClassifier(DecisionTreeClassifier())
classify(dir,clf,action)
elif int(classifier) == 11:
clf1 = GradientBoostingClassifier()
clf2 = AdaBoostClassifier()
clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
classify(dir,clf,action)
def learn(x, y, test_x):
cw = {"0":variables.weight_0_rf, "1000":variables.weight_1000_rf, "1500":variables.weight_1500_rf, "2000":variables.weight_2000_rf}
clf = ExtraTreesClassifier(n_jobs = -1,
n_estimators=variables.n_estimators_et,
max_depth=variables.max_depth_et, random_state=0,
min_samples_split=variables.min_samples_split_et,
min_samples_leaf=variables.min_samples_leaf_et,
max_features=variables.max_feature_et,
max_leaf_nodes=variables.max_leaf_nodes_et,
criterion=variables.criterion_et,
min_impurity_split=variables.min_impurity_split_et,
class_weight=variables.cw_et).fit(x, y)
print "n_estimators=", variables.n_estimators_et,
print "max_depth=", variables.max_depth_et,
print "min_samples_split=", variables.min_samples_split_et,
print "min_samples_leaf=", variables.min_samples_leaf_et,
print "max_features=",variables.max_feature_et,
print "max_leaf_nodes=",variables.max_leaf_nodes_et,
print "criterion=",variables.criterion_et,
print "min_impurity_split=",variables.min_impurity_split_et,
print "class_weight=", variables.cw_et
prediction_list = clf.predict(test_x)
prediction_list_prob = clf.predict_proba(test_x)
return prediction_list,prediction_list_prob
def define_clfs_params(self):
'''
Defines all relevant parameters and classes for classfier objects.
Edit these if you wish to change parameters.
'''
# These are the classifiers
self.clfs = {
'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
'LR': LogisticRegression(penalty = 'l1', C = 1e5),
'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
'NB': GaussianNB(),
'DT': DecisionTreeClassifier(),
'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
'KNN': KNeighborsClassifier(n_neighbors = 3)
}
# These are the parameters which will be run through
self.params = {
'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
'NB': {},
'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
}
def __init__(self):
params = dict(clf__n_estimators=[80, 120, 150, 170], clf__min_samples_split=[2], clf__max_depth=[None, 40])
class_weights = {'DEV': 1, 'WEB': 2, 'DATA': 4, 'DOCS': 4, 'EDU': 20, 'HW': 15, 'OTHER': 25}
super().__init__(ensemble.ExtraTreesClassifier(class_weight=class_weights), params, 'ExtraTreesClassifier')
def __init__(self, name, kwargs):
from sklearn.ensemble import ExtraTreesClassifier
super(GCExtraTreesClassifier, self).__init__(name, ExtraTreesClassifier, kwargs)
def predictNext(self, stock, pred_date_count, train_batch_size=100, use_NN=True):
trainX, trainY, trainD = self.getRawByCount(pred_date_count-train_batch_size, pred_date_count);
testX, testY, testD = self.getSingleRaw(pred_date_count)
testX = testX.reshape(1, -1)
# print trainX[0]
# print list(trainX)
sc = StandardScaler()
sc.fit(trainX)
trainX = sc.transform(trainX)
testX = sc.transform(testX)
# trainX = np.delete(trainX,0,axis=1)
# testX = np.delete(testX,0,axis=1)
fs_method = 'RFC'
pred_pro=[1,0]
trainX,testX = featureSelection (trainX, trainY, testX, [], method=fs_method, testmode=False, n_features_to_select=None)
if use_NN:
from Power import NNet
predY = NNet(TrainX=trainX, TrainY=trainY, TestX=testX)
# print predY
# pred_pro=[1,0]
else:
clf = ExtraTreesClassifier(criterion='gini', n_estimators=150, max_features='auto', n_jobs=4, class_weight='balanced')
# clf = DecisionTreeClassifier(class_weight='balanced')
clf.fit(trainX, trainY)
predY = clf.predict(testX)
# pred_pro = (clf.predict_proba(testX) if hasattr(clf, "predict_proba") else clf.decision_function(testX))
return predY[0], pred_pro[0],testY, testD, 1-clf.score(trainX, trainY)
def tree_based_selection(self, data_set, data_target, feature_names):
"""
:param data_set:
:return:
"""
clf = ExtraTreesClassifier()
clf = clf.fit(data_set, data_target)
print clf.feature_importances_
model = SelectFromModel(clf, prefit=True)
feature_set = model.transform(data_set)
fea_index = []
for A_col in np.arange(data_set.shape[1]):
for B_col in np.arange(feature_set.shape[1]):
if (data_set[:, A_col] == feature_set[:, B_col]).all():
fea_index.append(A_col)
check = {}
for i in fea_index:
check[feature_names[i]] = data_set[0][i]
print np.array(check)
return feature_set, fea_index
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 7
num_splits = 7
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=6,
random_state=seed,
n_estimators=500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 1
num_splits = 3
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=7,
random_state=seed,
n_estimators=1500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 3
num_splits = 7
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=11,
random_state=seed,
n_estimators=1500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 2
num_splits = 7
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=11,
random_state=seed,
n_estimators=2000,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 3
num_splits = 5
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=15,
random_state=seed,
n_estimators=2500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += p
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += reg.predict_proba(test2)[:,1]
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 3
num_splits = 5
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=15,
random_state=seed,
n_estimators=2500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 3
num_splits = 5
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=15,
random_state=seed,
n_estimators=2500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 3
num_splits = 5
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=15,
random_state=seed,
n_estimators=2500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += p
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += np.log1p(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
def et1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 7
num_splits = 17
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.ExtraTreesClassifier(max_depth=7,
random_state=seed,
n_estimators=1500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds