From 3896ba58864adf6efee0598e1423b2b303cc8aa8 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 19 Apr 2024 09:11:24 -0400 Subject: [PATCH] [INFRA] Introduce metaschema (#1787) * add metaschema and code to validate it. * WIP metaschema and code to validate * update schema * objects finished * add rules * add checks * add rules.files.common * add files.common.core and files.common.tables * add rules.files.deriv * dereferencing working * checkpoint * checkpoint * add rules.sidecars * sidecar derivatives checkpoint * tabular_data checkpoint * DONE! * DONE! * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * blackify * include references to draft7 metaschema * move metaschema * add tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * move things around * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update tools/schemacode/bidsschematools/validate_schema.py Co-authored-by: Taylor Salo * fix typo * add jsonschema to deps * fixing up checks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * install jsonschema types * fix pathing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * flake8 * try changing schema path * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix paths * use existing functions * convert from namespace * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update fields for existing loader * fix required props * ENH: Link metaschema into package data * RF: Use package data instead of relying on CWD * STY: Import from top-level jsonschema * PY38: importlib_resources * use "deprecated" instead of "DEPRECATED" * move to schema.py and test_schema.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/schema/objects/common_principles.yaml * Update README.md --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Taylor Salo Co-authored-by: Chris Markiewicz --- .pre-commit-config.yaml | 1 + src/metaschema.json | 727 ++++++++++++++++++ src/schema/README.md | 8 + src/schema/rules/sidecars/mri.yaml | 2 +- .../bidsschematools/data/metaschema.json | 1 + tools/schemacode/bidsschematools/schema.py | 28 + .../bidsschematools/tests/test_schema.py | 28 + tools/schemacode/setup.cfg | 2 + 8 files changed, 796 insertions(+), 1 deletion(-) create mode 100644 src/metaschema.json create mode 120000 tools/schemacode/bidsschematools/data/metaschema.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d2c0ed0fa..b2f40321ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,5 +68,6 @@ repos: - pytest - types-PyYAML - types-tabulate + - types-jsonschema args: ["tools/schemacode/bidsschematools"] pass_filenames: false diff --git a/src/metaschema.json b/src/metaschema.json new file mode 100644 index 0000000000..129e3f1c5d --- /dev/null +++ b/src/metaschema.json @@ -0,0 +1,727 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "meta": { + "type": "object", + "properties": { + "associations": { + "type": "object", + "patternProperties": + { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "selectors": { + "type": "array", + "items": { + "type": "string" + } + }, + "target": { + "type": "object", + "properties": { + "suffix": { + "type": "string" + }, + "extension": { + "anyOf": [ + { + "type": "string" + }, { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": ["extension"], + "additionalProperties": false + }, + "inherit": { + "type": "boolean" + } + }, + "required": ["target"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "context": { + "type": "object" + }, + "expression_tests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "expression": { + "type": "string" + }, + "result": {} + }, + "required": ["expression", "result"], + "additionalProperties": false + } + }, + "versions": { + "type": "array", + "items": { + "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$" + } + } + }, + "required": ["associations", "context", "expression_tests", "versions"] + }, + "objects": { + "type": "object", + "properties": { + "columns": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "type": { + "$ref": "http://json-schema.org/draft-07/schema#/properties/type" + }, + "format": { + "type": "string" + }, + "pattern": { + "type": "string" + }, + "unit": { + "type": "string" + }, + "enum": { + "type": "array" + }, + "anyOf": { + "$ref": "http://json-schema.org/draft-07/schema#/properties/anyOf" + }, + "maximum": { + "type": "number" + }, + "minimum": { + "type": "number" + } + }, + "required": ["name", "display_name"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "common_principles": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["display_name", "description"], + "additionalProperties": false + }, + "additionalProperties": false + } + }, + "datatypes": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["value", "display_name", "description"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "entities": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "type": { + "$ref": "http://json-schema.org/draft-07/schema#/properties/type" + }, + "format": { + "type": "string" + }, + "enum": { + "type": "array" + } + }, + "required": ["name", "display_name", "description", "type", "format"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "enums": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_-]+$": { + "type": "object", + "properties": { + "type": { + "$ref": "http://json-schema.org/draft-07/schema#/properties/type" + }, + "enum": { + "type": "array" + }, + "value": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "tags": { + "type": "array" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "extensions": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["display_name", "description", "value"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "files": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "display_name": { + "type": "string" + }, + "file_type": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["display_name", "file_type", "description"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "formats": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "pattern": { + "format": "regex" + } + }, + "required": ["display_name", "description", "pattern"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "metadata": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "type": { + "$ref": "http://json-schema.org/draft-07/schema#/properties/type" + } + }, + "required": ["name","display_name", "description"] + } + }, + "additionalProperties": false + }, + "modalities": { + "type": "object", + "patternProperties": { + "^[a-z]+$": { + "type": "object", + "properties": { + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["display_name", "description"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "suffixes": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": ["value", "display_name", "description"] + }, + "additionalProperties": false + } + } + }, + "required": ["columns", "common_principles", "datatypes", "entities", "enums", "extensions", "files", "formats", "metadata", "modalities", "suffixes"], + "additionalProperties": false + }, + "rules": { + "type": "object", + "properties": { + "checks": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "issues": { + "type": "object", + "properties": { + "code": {"type": "string"}, + "message": {"type": "string"}, + "level": {"enum": ["error", "warning"]} + }, + "required": ["code", "message", "level"], + "additionalProperties": false + }, + "selectors": { + "type": "array", + "items": {"type": "string"} + }, + "checks": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["checks", "selectors"] + } + } + } + }, + "additionalProperties": false + }, + "files": { + "type": "object", + "properties": { + "common": { + "type": "object", + "properties": { + "core": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "level": {"type": "string"}, + "path": {"type": "string"}, + "extensions": {"type": "array"}, + "stem": {"type": "string"} + }, + "required": ["level"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "tables": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "level": {"type": "string"}, + "path": {"type": "string"}, + "extensions": {"type": "array"}, + "stem": {"type": "string"}, + "entities": {"$ref": "#/definitions/entities"}, + "suffixes": {"type": "array"} + }, + "required": ["level", "extensions"], + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["core", "tables"], + "additionalProperties": false + }, + "deriv": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "entities": { + "$ref": "#/definitions/entities" + } + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + }, + "raw": { + "type": "object", + "patternProperties": { + "^[a-z]+$": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "suffixes": { + "type": "array", + "items": {"type": "string"} + }, + "extensions": { + "type": "array", + "items": {"pattern": "^[./][a-z.]+|"} + }, + "datatypes": { + "type": "array", + "items": {"pattern": "^[a-z]+$"} + }, + "entities": {"$ref": "#/definitions/entities"} + } + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["common", "deriv", "raw"], + "additionalProperties": false + }, + "sidecars": { + "type": "object", + "patternProperties": { + "^derivatives$": { + "type": "object", + "properties": { + "common_derivatives": {"$ref": "#/definitions/sidecar"} + }, + "required": ["common_derivatives"], + "additionalProperties": false + }, + "^(?!derivatives$)[a-z_]+$": { + "$ref": "#/definitions/sidecar" + }, + "additionalProperties": false + }, + "additionalProperties": false + }, + "tabular_data": { + "type": "object", + "patternProperties": { + "^derivatives$": { + "type": "object", + "properties": { + "common_derivatives": { + "$ref": "#/definitions/tabular_data" + } + }, + "required": [ + "common_derivatives" + ], + "additionalProperties": false + }, + "^(?!derivatives$)[a-z_]+$": { + "$ref": "#/definitions/tabular_data" + } + }, + "additionalProperties": false + }, + "common_principles": { + "type": "array", + "items": {"type": "string"} + }, + "dataset_metadata": { + "type": "object" + }, + "directories": { + "type": "object" + }, + "entities": { + "type": "array", + "items": {"type": "string"} + }, + "errors": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "code": {"type": "string"}, + "message": {"type": "string"}, + "level": {"enum": ["error", "warning"]}, + "selectors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["message", "level"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "modalities": { + "type": "object", + "patternProperties": { + "^[a-z]+$": { + "type": "object", + "properties": { + "datatypes": { + "type": "array", + "items": {"pattern": "^[a-z]+$"} + } + }, + "required": ["datatypes"], + "additionalProperties": false + } + } + } + }, + "required": [ + "entities", + "files", + "sidecars", + "tabular_data", + "common_principles", + "dataset_metadata", + "directories", + "errors", + "modalities" + ], + "additionalProperties": false + }, + "bids_version": { + "type": "string" + }, + "schema_version": { + "type": "string" + }, + "README": { + "type": "string" + } + }, + "required": ["meta", "objects", "rules", "bids_version", "schema_version"], + "additionalProperties": false, + "definitions": { + "entities": { + "type": "object", + "patternProperties": { + "^[a-z]+$": { + "anyOf": [ + {"enum": ["optional", "required"]}, + { + "type": "object", + "properties": { + "level": {"enum": ["optional", "required"]}, + "enum": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["level", "enum"] + } + ] + } + }, + "additionalProperties": false + }, + "sidecar": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "selectors": { + "type": "array", + "items": {"type": "string"} + }, + "fields": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "anyOf": [ + {"enum": ["recommended", "optional", "required", "deprecated"]}, + { + "type": "object", + "properties": { + "level": {"enum": ["recommended", "optional", "required", "deprecated"]}, + "level_addendum": {"type": "string"} + }, + "required": ["level", "level_addendum"], + "additionalProperties": false + }, + {"pattern": "recommended.*"} + ] + } + }, + "additionalProperties": false + } + }, + "required": ["selectors", "fields"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "tabular_data": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "type": "object", + "properties": { + "selectors": { + "type": "array", + "items": {"type": "string"} + }, + "columns": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "anyOf": [ + {"enum": ["recommended", "optional", "required", "deprecated"]}, + { + "type": "object", + "properties": { + "level": {"enum": ["recommended", "optional", "required", "deprecated"]}, + "level_addendum": {"type": "string"}, + "description_addendum": {"type": "string"} + }, + "required": ["level"], + "additionalProperties": false + }, + {"pattern": "recommended.*"} + ] + } + }, + "additionalProperties": false + }, + "additional_columns": { + "type": "string" + }, + "index_columns": {"type": "array", "items": {"type": "string"}}, + "initial_columns": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["selectors", "columns"], + "additionalProperties": false + } + } + } + } +} diff --git a/src/schema/README.md b/src/schema/README.md index d801314595..e8cb197757 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -1023,3 +1023,11 @@ be found at . The JSON version of the schema contains `schema_version` and `bids_version` keys that identify the state of both the schema and the specification at the time it was compiled. + +## Metaschema + +The `metaschema.json` file is a meta-schema that uses the JSON Schema language to +formalize the allowable directories, files, fields and values of the BIDS schema, +ensuring consistency across the entire schema directory. Validation of the schema is +incorporated into the CI, so any changes that are inconsistent will be flagged before +inclusion. diff --git a/src/schema/rules/sidecars/mri.yaml b/src/schema/rules/sidecars/mri.yaml index b937898b3d..cc27d676be 100644 --- a/src/schema/rules/sidecars/mri.yaml +++ b/src/schema/rules/sidecars/mri.yaml @@ -25,7 +25,7 @@ MRIHardware: SoftwareVersions: level: recommended description_addendum: Corresponds to DICOM Tag 0018, 1020 `Software Versions`. - HardcopyDeviceSoftwareVersion: DEPRECATED + HardcopyDeviceSoftwareVersion: deprecated MagneticFieldStrength: level: recommended, but required for Arterial Spin Labeling ReceiveCoilName: recommended diff --git a/tools/schemacode/bidsschematools/data/metaschema.json b/tools/schemacode/bidsschematools/data/metaschema.json new file mode 120000 index 0000000000..ae3cfc38f9 --- /dev/null +++ b/tools/schemacode/bidsschematools/data/metaschema.json @@ -0,0 +1 @@ +../../../../src/metaschema.json \ No newline at end of file diff --git a/tools/schemacode/bidsschematools/schema.py b/tools/schemacode/bidsschematools/schema.py index cae15fece1..ed6b5da5f3 100644 --- a/tools/schemacode/bidsschematools/schema.py +++ b/tools/schemacode/bidsschematools/schema.py @@ -1,12 +1,22 @@ """Schema loading- and processing-related functions.""" +import json import logging import os import re +import sys +import tempfile from collections.abc import Iterable, Mapping from copy import deepcopy from functools import lru_cache +from jsonschema import ValidationError, validate + +if sys.version_info < (3, 9): + from importlib_resources import files +else: + from importlib.resources import files + from . import __bids_version__, __version__, utils from .types import Namespace @@ -274,3 +284,21 @@ def filter_schema(schema, **kwargs): if isinstance(item, dict): new_schema[i] = filter_schema(item, **kwargs) return new_schema + + +def validate_schema(schema: Namespace): + """Validate a schema against the BIDS metaschema.""" + metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text()) + + # validate is put in this try/except clause because the error is sometimes too long to + # print in the terminal + try: + validate(instance=schema.to_dict(), schema=metaschema) + except ValidationError as e: + with tempfile.NamedTemporaryFile( + prefix="schema_error_", suffix=".txt", delete=False, mode="w+" + ) as file: + file.write(str(e)) + # ValidationError does not have an add_note method yet + # e.add_note(f"See {file.name} for full error log.") + raise e diff --git a/tools/schemacode/bidsschematools/tests/test_schema.py b/tools/schemacode/bidsschematools/tests/test_schema.py index bd5fc28942..6c52f0c2c1 100644 --- a/tools/schemacode/bidsschematools/tests/test_schema.py +++ b/tools/schemacode/bidsschematools/tests/test_schema.py @@ -4,6 +4,7 @@ from collections.abc import Mapping import pytest +from jsonschema.exceptions import ValidationError from bidsschematools import __bids_version__, schema, types @@ -356,3 +357,30 @@ def check_for_namespaces(obj): raise ValueError("Namespace object found in dict") check_for_namespaces(schema.load_schema().to_dict()) + + +def test_valid_schema(): + """Test that a valid schema does not raise an error.""" + namespace = schema.load_schema() + schema.validate_schema(namespace) + + +def test_add_legal_field(): + """Test that adding a legal field does not raise an error.""" + namespace = schema.load_schema() + namespace["rules"]["files"]["deriv"]["preprocessed_data"]["anat_nonparametric_common"][ + "entities" + ]["density"] = "optional" + schema.validate_schema(namespace) + + +def test_invalid_value(): + """Test that an invalid value raises an error.""" + namespace = schema.load_schema() + namespace["rules"]["files"]["deriv"]["preprocessed_data"]["anat_nonparametric_common"][ + "entities" + ]["density"] = "invalid" + with pytest.raises(ValidationError) as e: + schema.validate_schema(namespace) + print(e.value) + assert "invalid" in str(e.value) diff --git a/tools/schemacode/setup.cfg b/tools/schemacode/setup.cfg index 27159dee84..f75732b593 100644 --- a/tools/schemacode/setup.cfg +++ b/tools/schemacode/setup.cfg @@ -24,6 +24,7 @@ install_requires = click pyyaml importlib_resources; python_version < "3.9" + jsonschema packages = find: include_package_data = false zip_safe = false @@ -54,6 +55,7 @@ all = [options.package_data] bidsschematools = + data/metaschema.json data/schema/BIDS_VERSION data/schema/SCHEMA_VERSION data/schema/**/*.yaml