test_ingestion.py 文件源码-python代码片段

test_ingestion.py 文件源码

python

阅读 24 收藏 0 点赞 0 评论 0

项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码

def test_get_feature_extraction(app, hashed, weighting):
    norm_alpha = 0.5
    dsid, _, _ = get_features_cached(app, hashed=hashed, weighting=weighting,
                                     norm_alpha=norm_alpha)
    method = V01 + "/feature-extraction/{}".format(dsid)
    data = app.get_check(method)
    assert dict2type(data, collapse_lists=True) == {'analyzer': 'str',
                     'ngram_range': ['int'], 'stop_words': 'str',
                     'n_jobs': 'int', 'chunk_size': 'int',
                     'data_dir': 'str', 'n_samples': 'int',
                     'n_features': 'int', 'weighting': 'str',
                     'norm_alpha': 'float', 'use_hashing': 'bool',
                     'filenames': ['str'], 'max_df': 'float', 'min_df': 'float',
                     'parse_email_headers': 'bool', 'n_samples_processed': 'int',
                     'preprocess': []}

    assert data['use_hashing'] == hashed
    assert data['weighting'] == weighting
    assert data['norm_alpha'] == norm_alpha

    vect = joblib.load(os.path.join(CACHE_DIR, 'ediscovery_cache', dsid, 'vectorizer'))
    assert (data['use_hashing'] is True) == ('hashing' in type(vect).__name__.lower())