diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e1ed3b234..c59c7ce96a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,6 +14,16 @@ repos: - id: check-ast - id: check-added-large-files - id: check-case-conflict + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.29.0 + hooks: + - id: check-dependabot + - id: check-github-workflows + args: ["--verbose"] + - id: check-metaschema + files: src/metaschema.json + - id: check-readthedocs + files: readthedocs.yml - repo: https://github.com/psf/black rev: 24.4.2 hooks: @@ -57,7 +67,7 @@ repos: - id: codespell args: ["--config=.codespellrc", "--dictionary=-", "--dictionary=.codespell_dict"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.1 + rev: v1.11.0 hooks: - id: mypy # Sync with project.optional-dependencies.typing @@ -71,6 +81,8 @@ repos: - types-PyYAML - types-tabulate - types-jsonschema + - jsonschema + - httpx args: ["tools/schemacode/bidsschematools"] pass_filenames: false - repo: https://github.com/koalaman/shellcheck-precommit diff --git a/src/metaschema.json b/src/metaschema.json index 5d01c8862f..ce500d6418 100644 --- a/src/metaschema.json +++ b/src/metaschema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema#", + "$schema": "https://json-schema.org/draft/2020-12/schema#", "type": "object", "properties": { "meta": { @@ -12,10 +12,7 @@ "type": "object", "properties": { "selectors": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/ruleTypes/expressionList" }, "target": { "type": "object", @@ -51,7 +48,8 @@ "additionalProperties": false }, "context": { - "type": "object" + "description": "The context object is itself JSON schema", + "$ref": "https://json-schema.org/draft/2020-12/schema#" }, "expression_tests": { "type": "array", @@ -83,44 +81,19 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "type": { - "$ref": "http://json-schema.org/draft-07/schema#/properties/type" - }, - "format": { - "type": "string" - }, - "pattern": { - "type": "string" - }, - "unit": { - "type": "string" - }, - "enum": { - "type": "array" - }, - "anyOf": { - "$ref": "http://json-schema.org/draft-07/schema#/properties/anyOf" - }, - "maximum": { - "type": "number" - }, - "minimum": { - "type": "number" + "allOf": [ + { "$ref": "#/definitions/termTypes/JSONSchema" }, + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/nameValue" }, + { + "type": "object", + "properties": { + "format": { "$ref": "#/definitions/enums/formats" }, + "unit": { "type": "string" } + } } - }, - "required": ["name", "display_name"], - "additionalProperties": false + ], + "unevaluatedProperties": false } }, "additionalProperties": false @@ -129,17 +102,8 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - } - }, - "required": ["display_name", "description"], - "additionalProperties": false + "$ref": "#/definitions/termTypes/general", + "unevaluatedProperties": false }, "additionalProperties": false } @@ -148,20 +112,11 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - } - }, - "required": ["value", "display_name", "description"], - "additionalProperties": false + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/value" } + ], + "unevaluatedProperties": false } }, "additionalProperties": false @@ -170,35 +125,19 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "type": { - "$ref": "http://json-schema.org/draft-07/schema#/properties/type" - }, - "format": { - "type": "string" - }, - "enum": { - "type": "array" + "allOf": [ + { "$ref": "#/definitions/termTypes/JSONSchema" }, + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/nameValue" }, + { + "type": "object", + "properties": { + "format": { "$ref": "#/definitions/enums/formats" } + }, + "required": ["format"] } - }, - "required": [ - "name", - "display_name", - "description", - "type", - "format" ], - "additionalProperties": false + "unevaluatedProperties": false } }, "additionalProperties": false @@ -206,30 +145,18 @@ "enums": { "type": "object", "patternProperties": { - "^[a-zA-Z0-9_-]+$": { - "type": "object", - "properties": { - "type": { - "$ref": "http://json-schema.org/draft-07/schema#/properties/type" - }, - "enum": { - "type": "array" - }, - "value": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "tags": { - "type": "array" - } - }, - "additionalProperties": false - } + "^[a-zA-Z0-9][a-zA-Z0-9_-]*$": { + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/value" }, + { "properties": { "tags": { "type": "array" } } } + ] + }, + "^_[a-zA-Z0-9_-]+$": { + "$ref": "https://json-schema.org/draft/2020-12/schema#", + "required": ["type", "enum"] + }, + "unevaluatedProperties": false }, "additionalProperties": false }, @@ -237,20 +164,11 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - } - }, - "required": ["display_name", "description", "value"], - "additionalProperties": false + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/value" } + ], + "unevaluatedProperties": false } }, "additionalProperties": false @@ -259,66 +177,56 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "display_name": { - "type": "string" - }, - "file_type": { - "type": "string" - }, - "description": { - "type": "string" + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { + "properties": { "file_type": { "type": "string" } }, + "required": ["file_type"] } - }, - "required": ["display_name", "file_type", "description"], - "additionalProperties": false + ], + "unevaluatedProperties": false } }, "additionalProperties": false }, "formats": { "type": "object", - "patternProperties": { - "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "display_name": { - "type": "string" - }, - "description": { - "type": "string" + "propertyNames": { "$ref": "#/definitions/enums/formats" }, + "additionalProperties": { + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { + "type": "object", + "properties": { + "pattern": { + "$ref": "https://json-schema.org/draft/2020-12/meta/validation#/properties/pattern" + } }, - "pattern": { - "format": "regex" - } - }, - "required": ["display_name", "description", "pattern"], - "additionalProperties": false - } - }, - "additionalProperties": false + "required": ["pattern"] + } + ], + "unevaluatedProperties": false + } }, "metadata": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "type": { - "$ref": "http://json-schema.org/draft-07/schema#/properties/type" + "allOf": [ + { "$ref": "#/definitions/termTypes/JSONSchema" }, + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/nameValue" }, + { + "type": "object", + "properties": { + "recommended": { + "$ref": "https://json-schema.org/draft/2020-12/meta/validation#/properties/required" + }, + "unit": { "type": "string" } + } } - }, - "required": ["name", "display_name", "description"] + ], + "unevaluatedProperties": false } }, "additionalProperties": false @@ -327,17 +235,8 @@ "type": "object", "patternProperties": { "^[a-z]+$": { - "type": "object", - "properties": { - "display_name": { - "type": "string" - }, - "description": { - "type": "string" - } - }, - "required": ["display_name", "description"], - "additionalProperties": false + "$ref": "#/definitions/termTypes/general", + "unevaluatedProperties": false } }, "additionalProperties": false @@ -346,19 +245,22 @@ "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "description": { - "type": "string" + "allOf": [ + { "$ref": "#/definitions/termTypes/general" }, + { "$ref": "#/definitions/termTypes/value" }, + { + "type": "object", + "properties": { + "unit": { "type": "string" }, + "anyOf": { + "$ref": "https://json-schema.org/draft/2020-12/meta/applicator#/properties/anyOf" + }, + "maxValue": { "type": "number" }, + "minValue": { "type": "number" } + } } - }, - "required": ["value", "display_name", "description"] + ], + "unevaluatedProperties": false }, "additionalProperties": false } @@ -391,26 +293,19 @@ "^[a-zA-Z0-9_]+$": { "type": "object", "properties": { - "issues": { - "type": "object", - "properties": { - "code": { "type": "string" }, - "message": { "type": "string" }, - "level": { "enum": ["error", "warning"] } - }, - "required": ["code", "message", "level"], - "additionalProperties": false + "issue": { + "allOf": [{ "$ref": "#/definitions/ruleTypes/issue" }], + "required": ["level"] }, "selectors": { - "type": "array", - "items": { "type": "string" } + "$ref": "#/definitions/ruleTypes/expressionList" }, "checks": { - "type": "array", - "items": { "type": "string" } + "$ref": "#/definitions/ruleTypes/expressionList" } }, - "required": ["checks", "selectors"] + "required": ["checks", "selectors"], + "additionalProperties": false } } } @@ -454,18 +349,10 @@ "deriv": { "type": "object", "patternProperties": { - "^[a-zA-Z0-9_]+$": { + "^[a-z_]+$": { "type": "object", "patternProperties": { - "^[a-zA-Z0-9_]+$": { - "type": "object", - "properties": { - "entities": { - "$ref": "#/definitions/entities" - } - } - }, - "additionalProperties": false + "^[a-zA-Z0-9_]+$": { "$ref": "#/definitions/suffixRule" } } } }, @@ -549,10 +436,7 @@ "message": { "type": "string" }, "level": { "enum": ["error", "warning"] }, "selectors": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/ruleTypes/expressionList" } }, "required": ["message", "level"], @@ -596,35 +480,100 @@ }, "schema_version": { "type": "string" - }, - "README": { - "type": "string" } }, "required": ["meta", "objects", "rules", "bids_version", "schema_version"], "additionalProperties": false, "definitions": { - "entities": { - "type": "object", - "patternProperties": { - "^[a-z]+$": { - "anyOf": [ - { "enum": ["optional", "required"] }, - { - "type": "object", - "properties": { - "level": { "enum": ["optional", "required"] }, - "enum": { - "type": "array", - "items": { "type": "string" } - } - }, - "required": ["level", "enum"] - } - ] - } + "enums": { + "formats": { + "$comment": "Formats whose patterns are defined in BIDS schema", + "type": "string", + "enum": [ + "index", + "label", + "boolean", + "integer", + "number", + "string", + "hed_version", + "bids_uri", + "dataset_relative", + "date", + "datetime", + "file_relative", + "participant_relative", + "rrid", + "stimuli_relative", + "time", + "unit", + "uri" + ] }, - "additionalProperties": false + "requirement_level": { + "$comment": "Requirement levels that may apply to terms", + "type": "string", + "enum": ["required", "recommended", "optional", "deprecated"] + } + }, + "termTypes": { + "general": { + "type": "object", + "properties": { + "display_name": { "type": "string" }, + "description": { "type": "string" } + }, + "required": ["display_name", "description"] + }, + "JSONSchema": { + "$ref": "https://json-schema.org/draft/2020-12/schema#", + "anyOf": [{ "required": ["type"] }, { "required": ["anyOf"] }] + }, + "nameValue": { + "type": "object", + "properties": { "name": { "type": "string" } }, + "required": ["name"] + }, + "value": { + "type": "object", + "properties": { "value": { "type": "string" } }, + "required": ["value"] + } + }, + "ruleTypes": { + "field": { + "anyOf": [ + { "$ref": "#/definitions/enums/requirement_level" }, + { + "type": "object", + "properties": { + "level": { "$ref": "#/definitions/enums/requirement_level" }, + "level_addendum": { "type": "string" }, + "description_addendum": { "type": "string" }, + "issue": { "$ref": "#/definitions/ruleTypes/issue" } + }, + "required": ["level"], + "additionalProperties": false + }, + { "type": "string", "pattern": "recommended.*" } + ] + }, + "issue": { + "type": "object", + "properties": { + "code": { "type": "string" }, + "message": { "type": "string" }, + "level": { "enum": ["error", "warning"] } + }, + "required": ["code", "message"], + "additionalProperties": false + }, + "expressionList": { + "type": "array", + "items": { + "type": "string" + } + } }, "sidecar": { "type": "object", @@ -632,41 +581,12 @@ "^[a-zA-Z0-9_]+$": { "type": "object", "properties": { - "selectors": { - "type": "array", - "items": { "type": "string" } - }, + "selectors": { "$ref": "#/definitions/ruleTypes/expressionList" }, "fields": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "anyOf": [ - { - "enum": [ - "recommended", - "optional", - "required", - "deprecated" - ] - }, - { - "type": "object", - "properties": { - "level": { - "enum": [ - "recommended", - "optional", - "required", - "deprecated" - ] - }, - "level_addendum": { "type": "string" } - }, - "required": ["level", "level_addendum"], - "additionalProperties": false - }, - { "pattern": "recommended.*" } - ] + "$ref": "#/definitions/ruleTypes/field" } }, "additionalProperties": false @@ -684,42 +604,12 @@ "^[a-zA-Z0-9_]+$": { "type": "object", "properties": { - "selectors": { - "type": "array", - "items": { "type": "string" } - }, + "selectors": { "$ref": "#/definitions/ruleTypes/expressionList" }, "columns": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_]+$": { - "anyOf": [ - { - "enum": [ - "recommended", - "optional", - "required", - "deprecated" - ] - }, - { - "type": "object", - "properties": { - "level": { - "enum": [ - "recommended", - "optional", - "required", - "deprecated" - ] - }, - "level_addendum": { "type": "string" }, - "description_addendum": { "type": "string" } - }, - "required": ["level"], - "additionalProperties": false - }, - { "pattern": "recommended.*" } - ] + "$ref": "#/definitions/ruleTypes/field" } }, "additionalProperties": false @@ -774,7 +664,28 @@ "items": { "pattern": "^[a-zA-Z0-9]+$" } }, "extensions": { "type": "array", "items": { "type": "string" } }, - "entities": { "$ref": "#/definitions/entities" } + "entities": { + "type": "object", + "patternProperties": { + "^[a-z]+$": { + "anyOf": [ + { "enum": ["optional", "required"] }, + { + "type": "object", + "properties": { + "level": { "enum": ["optional", "required"] }, + "enum": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["level", "enum"] + } + ] + } + }, + "additionalProperties": false + } }, "required": ["suffixes", "extensions", "entities"], "additionalProperties": false diff --git a/src/schema/README.md b/src/schema/README.md index 5f1abb4595..68be4f2a9b 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -215,7 +215,7 @@ We see expressions may contain: - Comparison operators such as `==` (equality) or `in` (subfield exists in field) - Functions such as `intersects()` -In fact, the full list of fields is defined in the `meta.context.context` object, +In fact, the full list of fields is defined in the `meta.context` object, which (currently) contains at the top level: - `schema`: access to the schema itself @@ -270,7 +270,7 @@ The following functions should be defined by an interpreter: | `match(arg: str, pattern: str) -> bool` | `true` if `arg` matches the regular expression `pattern` (anywhere in string) | `match(extension, ".gz$")` | True if the file extension ends with `.gz` | | `max(arg: array) -> number` | The largest non-`n/a` value in an array | `max(columns.onset)` | The time of the last onset in an events.tsv file | | `min(arg: array) -> number` | The smallest non-`n/a` value in an array | `min(sidecar.SliceTiming) == 0` | A check that the onset of the first slice is 0s | -| `sorted(arg: array) -> array` | The sorted values of the input array | `sorted(sidecar.VolumeTiming) == sidecar.VolumeTiming` | True if `sidecar.VolumeTiming` is sorted | +| `sorted(arg: array, method: str) -> array` | The sorted values of the input array; defaults to type-determined sort. If method is "lexical", or "numeric" use lexical or numeric sort. | `sorted(sidecar.VolumeTiming) == sidecar.VolumeTiming` | True if `sidecar.VolumeTiming` is sorted | | `substr(arg: str, start: int, end: int) -> str` | The portion of the input string spanning from start position to end position | `substr(path, 0, length(path) - 3)` | `path` with the last three characters dropped | | `type(arg: Any) -> str` | The name of the type, including `"array"`, `"object"`, `"null"` | `type(datatypes)` | Returns `"array"` | diff --git a/src/schema/SCHEMA_VERSION b/src/schema/SCHEMA_VERSION index dc9bff91aa..539f9fc668 100644 --- a/src/schema/SCHEMA_VERSION +++ b/src/schema/SCHEMA_VERSION @@ -1 +1 @@ -0.9.1-dev +0.10.1-dev diff --git a/src/schema/meta/associations.yaml b/src/schema/meta/associations.yaml index b55f746bf3..503fa728c1 100644 --- a/src/schema/meta/associations.yaml +++ b/src/schema/meta/associations.yaml @@ -1,4 +1,4 @@ -# These rules indicate whether an association (defined in meta.context.context.associations) +# These rules indicate whether an association (defined in meta.context.associations) # applies to a given file. These are hints to allow implementations to avoid unnecessary # directory and file reads. # diff --git a/src/schema/meta/context.yaml b/src/schema/meta/context.yaml index 7f9ca1259c..280ae7b933 100644 --- a/src/schema/meta/context.yaml +++ b/src/schema/meta/context.yaml @@ -17,353 +17,352 @@ # principle. # --- -context: - type: object - properties: - schema: - description: 'The BIDS specification schema' - type: object - dataset: - description: 'Properties and contents of the entire dataset' - type: object - properties: - dataset_description: - description: 'Contents of /dataset_description.json' - type: object - files: - description: 'List of all files in dataset' - type: array - tree: - description: 'Tree view of all files in dataset' - type: object - ignored: - description: 'Set of ignored files' - type: array - datatypes: - description: 'Data types present in the dataset' - type: array - modalities: - description: 'Modalities present in the dataset' - type: array - subjects: - description: 'Collections of subjects in dataset' - type: object - properties: - sub_dirs: - description: 'Subjects as determined by sub-*/ directories' - type: array - items: - type: string - participant_id: - description: 'The participant_id column of participants.tsv' - type: array - items: - type: string - phenotype: - description: 'The union of participant_id columns in phenotype files' - type: array - items: - type: string - subject: - description: 'Properties and contents of the current subject' - type: object - properties: - sessions: - description: 'Collections of sessions in subject' - type: object - properties: - ses_dirs: - description: 'Sessions as determined by ses-*/ directories' - type: array - items: - type: string - session_id: - description: 'The session_id column of sessions.tsv' - type: array - items: - type: string - phenotype: - description: 'The union of session_id columns in phenotype files' - type: array - items: - type: string - - # Properties of the current file - path: - description: 'Path of the current file' - type: string - size: - description: 'Length of the current file in bytes' - type: int - entities: - description: 'Entities parsed from the current filename' - type: object - datatype: - description: 'Datatype of current file, for examples, anat' - type: string - suffix: - description: 'Suffix of current file' - type: string - extension: - description: 'Extension of current file including initial dot' - type: string - modality: - description: 'Modality of current file, for examples, MRI' - type: string - - sidecar: - description: 'Sidecar metadata constructed via the inheritance principle' - type: object - associations: - # Note that this is not intended to be an exhaustive list of associated files - # or to expose every attribute of those files. It is specifically those files - # and attributes for which a rule needs to be applied from an originating file. - description: | - Associated files, indexed by suffix, selected according to the inheritance principle - type: object - properties: - events: - description: 'Events file' - type: object - properties: - path: - description: 'Path to associated events file' - type: string - onset: - description: 'Contents of the onset column' - type: array - items: - type: string - aslcontext: - description: 'ASL context file' - type: object - properties: - path: - description: 'Path to associated aslcontext file' +type: object +properties: + schema: + description: 'The BIDS specification schema' + type: object + dataset: + description: 'Properties and contents of the entire dataset' + type: object + properties: + dataset_description: + description: 'Contents of /dataset_description.json' + type: object + files: + description: 'List of all files in dataset' + type: array + tree: + description: 'Tree view of all files in dataset' + type: object + ignored: + description: 'Set of ignored files' + type: array + datatypes: + description: 'Data types present in the dataset' + type: array + modalities: + description: 'Modalities present in the dataset' + type: array + subjects: + description: 'Collections of subjects in dataset' + type: object + properties: + sub_dirs: + description: 'Subjects as determined by sub-*/ directories' + type: array + items: type: string - n_rows: - description: 'Number of rows in aslcontext.tsv' - type: integer - volume_type: - description: 'Contents of the volume_type column' - type: array - items: - type: string - m0scan: - description: 'M0 scan file' - type: object - properties: - path: - description: 'Path to associated M0 scan file' + participant_id: + description: 'The participant_id column of participants.tsv' + type: array + items: type: string - magnitude: - description: 'Magnitude image file' - type: object - properties: - path: - description: 'Path to associated magnitude file' + phenotype: + description: 'The union of participant_id columns in phenotype files' + type: array + items: type: string - magnitude1: - description: 'Magnitude1 image file' - type: object - properties: - path: - description: 'Path to associated magnitude1 file' + subject: + description: 'Properties and contents of the current subject' + type: object + properties: + sessions: + description: 'Collections of sessions in subject' + type: object + properties: + ses_dirs: + description: 'Sessions as determined by ses-*/ directories' + type: array + items: type: string - bval: - description: 'B value file' - type: object - properties: - path: - description: 'Path to associated bval file' + session_id: + description: 'The session_id column of sessions.tsv' + type: array + items: type: string - n_cols: - description: 'Number of columns in bval file' - type: integer - n_rows: - description: 'Number of rows in bval file' - type: integer - values: - description: 'B-values contained in bval file' - type: array - items: - type: number - bvec: - description: 'B vector file' - type: object - properties: - path: - description: 'Path to associated bvec file' + phenotype: + description: 'The union of session_id columns in phenotype files' + type: array + items: type: string - n_cols: - description: 'Number of columns in bvec file' - type: integer - n_rows: - description: 'Number of rows in bvec file' - type: integer - channels: - description: 'Channels file' - type: object - properties: - path: - description: 'Path to associated channels file' + + # Properties of the current file + path: + description: 'Path of the current file' + type: string + size: + description: 'Length of the current file in bytes' + type: integer + entities: + description: 'Entities parsed from the current filename' + type: object + datatype: + description: 'Datatype of current file, for examples, anat' + type: string + suffix: + description: 'Suffix of current file' + type: string + extension: + description: 'Extension of current file including initial dot' + type: string + modality: + description: 'Modality of current file, for examples, MRI' + type: string + + sidecar: + description: 'Sidecar metadata constructed via the inheritance principle' + type: object + associations: + # Note that this is not intended to be an exhaustive list of associated files + # or to expose every attribute of those files. It is specifically those files + # and attributes for which a rule needs to be applied from an originating file. + description: | + Associated files, indexed by suffix, selected according to the inheritance principle + type: object + properties: + events: + description: 'Events file' + type: object + properties: + path: + description: 'Path to associated events file' + type: string + onset: + description: 'Contents of the onset column' + type: array + items: type: string - type: - description: 'Contents of the type column' - type: array - items: - type: string - coordsystem: - description: 'Coordinate system file' - type: object - properties: - path: - description: 'Path to associated coordsystem file' + aslcontext: + description: 'ASL context file' + type: object + properties: + path: + description: 'Path to associated aslcontext file' + type: string + n_rows: + description: 'Number of rows in aslcontext.tsv' + type: integer + volume_type: + description: 'Contents of the volume_type column' + type: array + items: type: string - # The following properties are populated if the current file is of an appropriate type - columns: - description: 'TSV columns, indexed by column header, values are arrays with column contents' - type: object - additionalProperties: - type: array - json: - description: 'Contents of the current JSON file' - type: object - gzip: - description: 'Parsed contents of gzip header' - type: object - properties: - timestamp: - description: 'Modification time, unix timestamp' - type: number - filename: - description: 'Filename' - type: string - comment: - description: 'Comment' - type: string - nifti_header: - name: 'NIfTI Header' - description: 'Parsed contents of NIfTI header referenced elsewhere in schema.' - type: object - properties: - dim_info: - name: 'Dimension Information' - description: 'Metadata about dimensions data.' - type: object - properties: - freq: - name: 'Frequency' - description: 'These fields encode which spatial dimension (1, 2, or 3).' - type: integer - phase: - name: 'Phase' - description: 'Corresponds to which acquisition dimension for MRI data.' - type: integer - slice: - name: 'Slice' - description: 'Slice dimensions.' - type: integer - dim: - name: 'Data Dimensions' - description: 'Data seq dimensions.' - type: array - minItems: 8 - maxItems: 8 - items: + m0scan: + description: 'M0 scan file' + type: object + properties: + path: + description: 'Path to associated M0 scan file' + type: string + magnitude: + description: 'Magnitude image file' + type: object + properties: + path: + description: 'Path to associated magnitude file' + type: string + magnitude1: + description: 'Magnitude1 image file' + type: object + properties: + path: + description: 'Path to associated magnitude1 file' + type: string + bval: + description: 'B value file' + type: object + properties: + path: + description: 'Path to associated bval file' + type: string + n_cols: + description: 'Number of columns in bval file' type: integer - pixdim: - name: 'Pixel Dimension' - description: 'Grid spacings (unit per dimension).' - type: array - minItems: 8 - maxItems: 8 - items: - type: number - shape: - name: 'Data shape' - description: 'Data array shape, equal to dim[1:dim[0] + 1]' - type: array - minItems: 0 - maxItems: 7 - items: + n_rows: + description: 'Number of rows in bval file' type: integer - voxel_sizes: - name: 'Voxel sizes' - description: 'Voxel sizes, equal to pixdim[1:dim[0] + 1]' - type: array - minItems: 0 - maxItems: 7 - items: - type: number - xyzt_units: - name: 'XYZT Units' - description: 'Units of pixdim[1..4]' - type: object - properties: - xyz: - name: 'XYZ Units' - description: 'String representing the unit of voxel spacing.' - type: string - enum: - - $ref: objects.enums.unknown.value - # TODO: Add definitions for these values. (perhaps don't specify) - - 'meter' - - 'mm' - - 'um' - t: - name: 'Time Unit' - description: 'String representing the unit of inter-volume intervals.' + values: + description: 'B-values contained in bval file' + type: array + items: + type: number + bvec: + description: 'B vector file' + type: object + properties: + path: + description: 'Path to associated bvec file' + type: string + n_cols: + description: 'Number of columns in bvec file' + type: integer + n_rows: + description: 'Number of rows in bvec file' + type: integer + channels: + description: 'Channels file' + type: object + properties: + path: + description: 'Path to associated channels file' + type: string + type: + description: 'Contents of the type column' + type: array + items: type: string - enum: - - $ref: objects.enums.unknown.value - # TODO: Add definitions for these values. (perhaps don't specify) - - 'sec' - - 'msec' - - 'usec' - qform_code: - name: 'qform code' - description: 'Use of the quaternion fields.' + coordsystem: + description: 'Coordinate system file' + type: object + properties: + path: + description: 'Path to associated coordsystem file' + type: string + # The following properties are populated if the current file is of an appropriate type + columns: + description: 'TSV columns, indexed by column header, values are arrays with column contents' + type: object + additionalProperties: + type: array + json: + description: 'Contents of the current JSON file' + type: object + gzip: + description: 'Parsed contents of gzip header' + type: object + properties: + timestamp: + description: 'Modification time, unix timestamp' + type: number + filename: + description: 'Filename' + type: string + comment: + description: 'Comment' + type: string + nifti_header: + name: 'NIfTI Header' + description: 'Parsed contents of NIfTI header referenced elsewhere in schema.' + type: object + properties: + dim_info: + name: 'Dimension Information' + description: 'Metadata about dimensions data.' + type: object + properties: + freq: + name: 'Frequency' + description: 'These fields encode which spatial dimension (1, 2, or 3).' + type: integer + phase: + name: 'Phase' + description: 'Corresponds to which acquisition dimension for MRI data.' + type: integer + slice: + name: 'Slice' + description: 'Slice dimensions.' + type: integer + dim: + name: 'Data Dimensions' + description: 'Data seq dimensions.' + type: array + minItems: 8 + maxItems: 8 + items: type: integer - sform_code: - name: 'sform code' - description: 'Use of the affine fields.' + pixdim: + name: 'Pixel Dimension' + description: 'Grid spacings (unit per dimension).' + type: array + minItems: 8 + maxItems: 8 + items: + type: number + shape: + name: 'Data shape' + description: 'Data array shape, equal to dim[1:dim[0] + 1]' + type: array + minItems: 0 + maxItems: 7 + items: type: integer - ome: - name: 'Open Microscopy Environment fields' - description: 'Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files' - type: object - properties: - PhysicalSizeX: - name: 'PhysicalSizeX' - description: 'Pixels / @PhysicalSizeX' - type: float - PhysicalSizeY: - name: 'PhysicalSizeY' - description: 'Pixels / @PhysicalSizeY' - type: float - PhysicalSizeZ: - name: 'PhysicalSizeZ' - description: 'Pixels / @PhysicalSizeZ' - type: float - PhysicalSizeXUnit: - name: 'PhysicalSizeXUnit' - description: 'Pixels / @PhysicalSizeXUnit' - type: string - PhysicalSizeYUnit: - name: 'PhysicalSizeYUnit' - description: 'Pixels / @PhysicalSizeYUnit' - type: string - PhysicalSizeZUnit: - name: 'PhysicalSizeZUnit' - description: 'Pixels / @PhysicalSizeZUnit' - type: string - tiff: - name: 'TIFF' - description: 'TIFF file format metadata' - type: object - properties: - version: - name: 'Version' - description: 'TIFF file format version (the second 2-byte block)' - type: int + voxel_sizes: + name: 'Voxel sizes' + description: 'Voxel sizes, equal to pixdim[1:dim[0] + 1]' + type: array + minItems: 0 + maxItems: 7 + items: + type: number + xyzt_units: + name: 'XYZT Units' + description: 'Units of pixdim[1..4]' + type: object + properties: + xyz: + name: 'XYZ Units' + description: 'String representing the unit of voxel spacing.' + type: string + enum: + - $ref: objects.enums.unknown.value + # TODO: Add definitions for these values. (perhaps don't specify) + - 'meter' + - 'mm' + - 'um' + t: + name: 'Time Unit' + description: 'String representing the unit of inter-volume intervals.' + type: string + enum: + - $ref: objects.enums.unknown.value + # TODO: Add definitions for these values. (perhaps don't specify) + - 'sec' + - 'msec' + - 'usec' + qform_code: + name: 'qform code' + description: 'Use of the quaternion fields.' + type: integer + sform_code: + name: 'sform code' + description: 'Use of the affine fields.' + type: integer + ome: + name: 'Open Microscopy Environment fields' + description: 'Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files' + type: object + properties: + PhysicalSizeX: + name: 'PhysicalSizeX' + description: 'Pixels / @PhysicalSizeX' + type: number + PhysicalSizeY: + name: 'PhysicalSizeY' + description: 'Pixels / @PhysicalSizeY' + type: number + PhysicalSizeZ: + name: 'PhysicalSizeZ' + description: 'Pixels / @PhysicalSizeZ' + type: number + PhysicalSizeXUnit: + name: 'PhysicalSizeXUnit' + description: 'Pixels / @PhysicalSizeXUnit' + type: string + PhysicalSizeYUnit: + name: 'PhysicalSizeYUnit' + description: 'Pixels / @PhysicalSizeYUnit' + type: string + PhysicalSizeZUnit: + name: 'PhysicalSizeZUnit' + description: 'Pixels / @PhysicalSizeZUnit' + type: string + tiff: + name: 'TIFF' + description: 'TIFF file format metadata' + type: object + properties: + version: + name: 'Version' + description: 'TIFF file format version (the second 2-byte block)' + type: integer diff --git a/src/schema/meta/expression_tests.yaml b/src/schema/meta/expression_tests.yaml index 8ab60a28d4..0d5d54a1f2 100644 --- a/src/schema/meta/expression_tests.yaml +++ b/src/schema/meta/expression_tests.yaml @@ -110,6 +110,16 @@ result: null - expression: sorted([3, 2, 1]) result: [1, 2, 3] +- expression: sorted([1, 2, 5, 10], "lexical") + result: [1, 10, 2, 5] +- expression: sorted(["1", "2", "5", "10"]) + result: ["1", "10", "2", "5"] +- expression: sorted(["1", "2", "5", "10"], "numeric") + result: ["1", "2", "5", "10"] +- expression: sorted(["1", "2", "n/a"], "numeric") + result: ["1", "2", "n/a"] +- expression: sorted(["n/a", "2", "1"], "numeric") + result: ["n/a", "1", "2"] - expression: allequal(sorted([3, 2, 1]), [1, 2, 3]) result: true # Regression test. Javascript will sort lexically by default. diff --git a/src/schema/objects/metadata.yaml b/src/schema/objects/metadata.yaml index 70926904fc..5614a37c3f 100644 --- a/src/schema/objects/metadata.yaml +++ b/src/schema/objects/metadata.yaml @@ -1104,8 +1104,8 @@ GeneratedBy: minItems: 1 items: type: object - required_fields: [Name] - recommended_fields: [Version] + required: [Name] + recommended: [Version] properties: Name: name: Name @@ -1136,7 +1136,7 @@ GeneratedBy: used to produce the dataset. Valid keys in this object include `Type`, `Tag` and [`URI`][uri] with [string][] values. type: object - recommended_fields: + recommended: - Type - Tag - URI @@ -1170,7 +1170,7 @@ Genetics: description: | An object containing information about the genetics descriptor. type: object - required_fields: [Dataset] + required: [Dataset] properties: Dataset: name: Dataset @@ -3353,7 +3353,7 @@ StimulusPresentation: Object containing key-value pairs related to the software used to present the stimuli during the experiment. type: object - recommended_fields: + recommended: - OperatingSystem - ScreenDistance - ScreenRefreshRate diff --git a/src/schema/rules/checks/eeg.yaml b/src/schema/rules/checks/eeg.yaml new file mode 100644 index 0000000000..b0b0dfd2bc --- /dev/null +++ b/src/schema/rules/checks/eeg.yaml @@ -0,0 +1,79 @@ +--- +EEGChannelCountReq: + issue: + code: EEG_CHANNEL_COUNT_MISMATCH + message: | + The EEGChannelCount metadata does not match the number of channels + with type EEG in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.EEGChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.EEGChannelCount == count(associations.channels.type, "EEG") +ECGChannelCountReq: + issue: + code: ECG_CHANNEL_COUNT_MISMATCH + message: | + The ECGChannelCount metadata does not match the number of channels + with type ECG in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.ECGChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.ECGChannelCount == count(associations.channels.type, "ECG") +EMGChannelCountReq: + issue: + code: EMG_CHANNEL_COUNT_MISMATCH + message: | + The EMGChannelCount metadata does not match the number of channels + with type EMG in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.EMGChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.EMGChannelCount == count(associations.channels.type, "EMG") +EOGChannelCountReq: + issue: + code: EOG_CHANNEL_COUNT_MISMATCH + message: | + The EOGChannelCount metadata does not match the number of channels + with type EOG in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.EOGChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.EOGChannelCount == count(associations.channels.type, "EOG") +MiscChannelCountReq: + issue: + code: MISC_CHANNEL_COUNT_MISMATCH + message: | + The MiscChannelCount metadata does not match the number of channels + with type MISC in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.MiscChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.MiscChannelCount == count(associations.channels.type, "MISC") +TriggerChannelCountReq: + issue: + code: TRIGGER_CHANNEL_COUNT_MISMATCH + message: | + The TriggerChannelCount metadata does not match the number of channels + with type TRIG in the associated channels.tsv file. + level: warning + selectors: + - suffix == "eeg" + - type(sidecar.TriggerChannelCount) != "null" + - associations.channels.type + checks: + - sidecar.TriggerChannelCount == count(associations.channels.type, "TRIG") diff --git a/src/schema/rules/checks/events.yaml b/src/schema/rules/checks/events.yaml index 234b9d078d..1f0f57a123 100644 --- a/src/schema/rules/checks/events.yaml +++ b/src/schema/rules/checks/events.yaml @@ -39,4 +39,4 @@ SortedOnsets: - extension == ".tsv" checks: # n/a values will likely cause false alarms if encountered. Consider alternatives. - - allequal(sorted(columns.onset), columns.onset) + - allequal(sorted(columns.onset, "numeric"), columns.onset) diff --git a/src/schema/rules/sidecars/func.yaml b/src/schema/rules/sidecars/func.yaml index f3b7e70b69..dbb4d714e0 100644 --- a/src/schema/rules/sidecars/func.yaml +++ b/src/schema/rules/sidecars/func.yaml @@ -58,7 +58,7 @@ MRIFuncTimingParameters: field and that do not have the `SliceTiming` field set to allow for accurate calculation of "acquisition time" issue: - name: VOLUME_TIMING_MISSING_ACQUISITION_DURATION + code: VOLUME_TIMING_MISSING_ACQUISITION_DURATION message: | The field 'VolumeTiming' requires 'AcquisitionDuration' or 'SliceTiming' to be defined. DelayAfterTrigger: recommended diff --git a/src/schema/rules/sidecars/mri.yaml b/src/schema/rules/sidecars/mri.yaml index cc27d676be..c9b94ec83b 100644 --- a/src/schema/rules/sidecars/mri.yaml +++ b/src/schema/rules/sidecars/mri.yaml @@ -27,7 +27,8 @@ MRIHardware: description_addendum: Corresponds to DICOM Tag 0018, 1020 `Software Versions`. HardcopyDeviceSoftwareVersion: deprecated MagneticFieldStrength: - level: recommended, but required for Arterial Spin Labeling + level: recommended + level_addendum: required for Arterial Spin Labeling ReceiveCoilName: recommended ReceiveCoilActiveElements: recommended NumberReceiveCoilActiveElements: optional @@ -170,14 +171,14 @@ PhaseEncodingDirectionReq: PhaseEncodingDirection: level: required issue: - name: PHASE_ENCODING_DIRECTION_MUST_DEFINE - issue: | + code: PHASE_ENCODING_DIRECTION_MUST_DEFINE + message: | You have to define 'PhaseEncodingDirection' for this file. TotalReadoutTime: level: required description_addendum: 3 issue: - name: TOTAL_READOUT_TIME_MUST_DEFINE + code: TOTAL_READOUT_TIME_MUST_DEFINE message: | You have to define 'TotalReadoutTime' for this file. @@ -191,7 +192,7 @@ MRITimingParameters: required if corresponding fieldmap data is present, or the data comes from a multi-echo sequence or Arterial Spin Labeling. issue: - name: ECHO_TIME_NOT_DEFINED + code: ECHO_TIME_NOT_DEFINED message: | You must define 'EchoTime' for this file. 'EchoTime' is the echo time (TE) for the acquisition, specified in seconds. Corresponds to DICOM Tag @@ -273,7 +274,7 @@ MRIFlipAngleLookLockerTrue: FlipAngle: level: required issue: - name: LOOK_LOCKER_FLIP_ANGLE_MISSING + code: LOOK_LOCKER_FLIP_ANGLE_MISSING message: | You should define 'FlipAngle' for this file, in case of a LookLocker acquisition. 'FlipAngle' is the diff --git a/tools/schemacode/bidsschematools/render/tables.py b/tools/schemacode/bidsschematools/render/tables.py index b32e982e52..9366ae5725 100644 --- a/tools/schemacode/bidsschematools/render/tables.py +++ b/tools/schemacode/bidsschematools/render/tables.py @@ -528,8 +528,8 @@ def make_subobject_table( The tabulated table as a Markdown string. """ obj = schema.objects[object_name] - required_fields = set(obj.get("required_fields", ())) - recommended_fields = set(obj.get("recommended_fields", ())) + required_fields = set(obj.get("required", ())) + recommended_fields = set(obj.get("recommended", ())) field_info = {} for field in obj.properties: diff --git a/tools/schemacode/bidsschematools/schema.py b/tools/schemacode/bidsschematools/schema.py index ed6b5da5f3..a00eaddac9 100644 --- a/tools/schemacode/bidsschematools/schema.py +++ b/tools/schemacode/bidsschematools/schema.py @@ -164,7 +164,10 @@ def flatten_enums(namespace, inplace=True): namespace = deepcopy(namespace) for struct in _find(namespace, lambda obj: "anyOf" in obj): try: - all_enum = [val for item in struct["anyOf"] for val in item["enum"]] + # Deduplicate because JSON schema validators may not like duplicates + # Long run, we should get rid of this function and have the rendering + # code handle anyOfs + all_enum = list(dict.fromkeys(val for item in struct["anyOf"] for val in item["enum"])) except KeyError: continue