modelData.py 文件源码-python代码片段

modelData.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

项目：rdocChallenge 作者: Elyne 项目源码文件源码

def grid_search(estimator, data, featTypes=('BoW',), nFolds=10, random_seed=44, param_grid=()):

    labels = [x.severity for x in data]

    generatePrimaryFeats(data, featTypes)

    featurized = []
    for d in data:
        instance = {}
        for featname, values in d.feats.items():
            # Give each feature a unique name to avoid overwriting features.
            # If e.g. a concept feature has the same name as a bow word, the old code
            # would overwrite one of the features.
            instance.update({"{0}-{1}".format(featname, k): v for k, v in values.items()})

        featurized.append(instance)

    d = DictVectorizer()
    x_train = d.fit_transform(featurized)

    folds = cross_validation.StratifiedKFold(labels, n_folds=nFolds, shuffle=True, random_state=random_seed)
    grid = GridSearchCV(estimator, param_grid=param_grid, scoring="f1", n_jobs=-1, cv=folds)
    fit_grid = grid.fit(x_train, labels)

    print(fit_grid.best_params_)
    return fit_grid.best_params_