def test_single_domain():
docs = [{'html': 'foo{} bar'.format(i % 4),
'url': 'http://example.com/{}'.format(i),
'relevant': i % 2 == 0}
for i in range(10)]
result = train_model(docs)
pprint(attr.asdict(result.meta))
assert lst_as_dict(result.meta.advice)[:2] == [
{'kind': 'Warning',
'text': "Only 1 relevant domain in data means that it's impossible to do "
'cross-validation across domains, and will likely result in '
'model over-fitting.'},
{'kind': 'Warning',
'text': 'Number of human labeled documents is just 10, consider having '
'at least 100 labeled.'},
]
assert lst_as_dict(result.meta.description)[:3] == [
{'heading': 'Dataset',
'text': '10 documents, 10 labeled across 1 domain.'},
{'heading': 'Class balance',
'text': '50% relevant, 50% not relevant.'},
{'heading': 'Metrics', 'text': ''},
]
assert result.model is None
评论列表
文章目录