def get_shape_descriptor(self) -> typing.Optional[str]:
"""
Return a string identifying the shape/structure/format of the data in this bundle
document, so that it may be indexed appropriately.
Currently, this returns a string identifying the metadata schema release major number.
For example:
v3 - Bundle contains metadata in the version 3 format
v4 - Bundle contains metadata in the version 4 format
...
This includes verification that schema major number is the same for all index metadata
files in the bundle, consistent with the current HCA ingest service behavior. If no
metadata version information is contained in the bundle, the empty string is returned.
Currently this occurs in the case of the empty bundle used for deployment testing.
If/when bundle schemas are available, this function should be updated to reflect the
bundle schema type and major version number.
Other projects (non-HCA) may manage their metadata schemas (if any) and schema versions.
This should be an extension point that is customizable by other projects according to
their metadata.
"""
schema_version_map = defaultdict(set) # type: typing.MutableMapping[str, typing.MutableSet[str]]
for filename, file_content in self.files.items():
core = file_content.get('core')
if core is not None:
schema_type = core['type']
schema_version = core['schema_version']
schema_version_major = schema_version.split(".")[0]
schema_version_map[schema_version_major].add(schema_type)
else:
self.logger.info("%s", (f"File {filename} does not contain a 'core' section to identify "
"the schema and schema version."))
if schema_version_map:
schema_versions = schema_version_map.keys()
assert len(schema_versions) == 1, \
"The bundle contains mixed schema major version numbers: {}".format(sorted(list(schema_versions)))
return "v" + list(schema_versions)[0]
else:
return None # No files with schema identifiers were found
评论列表
文章目录