def svmstruct_cv_score(dataset, C, class_weight, constraints,
compat_features, second_order_features):
fn = cache_fname("svmstruct_cv_score", (dataset, C, class_weight,
constraints, compat_features,
second_order_features))
if os.path.exists(fn):
logging.info("Cached file already exists.")
with open(fn, "rb") as f:
return dill.load(f)
load, ids = get_dataset_loader(dataset, split="train")
n_folds = 5 if dataset == 'ukp' else 3
# below are boolean logical ops
grandparents = second_order_features and dataset == 'ukp'
coparents = second_order_features
siblings = second_order_features and dataset == 'cdcp'
scores = []
all_Y_pred = []
for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
train_docs = list(load(ids[tr]))
val_docs = list(load(ids[val]))
clf, Y_val, Y_pred = fit_predict(train_docs, val_docs, dataset, C,
class_weight,
constraints, compat_features,
second_order_features, grandparents,
coparents, siblings)
all_Y_pred.extend(Y_pred)
scores.append(clf.model._score(Y_val, Y_pred))
with open(fn, "wb") as f:
dill.dump((scores, all_Y_pred), f)
return scores, all_Y_pred
评论列表
文章目录