From f418f8dcefb2373131222ea8a59484876d4bc131 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 29 Mar 2022 12:38:00 -0400 Subject: [PATCH 01/10] Describe object types in the schema. --- src/schema/objects/types.yaml | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/schema/objects/types.yaml diff --git a/src/schema/objects/types.yaml b/src/schema/objects/types.yaml new file mode 100644 index 0000000000..45340413e5 --- /dev/null +++ b/src/schema/objects/types.yaml @@ -0,0 +1,44 @@ +--- +# This file describes the types of objects in the BIDS schema. +# Each object type has its own YAML file describing valid possible values for objects of that type. +associated_data: + name: Associated Data + description: | + Directories that may appear within a dataset directory without following BIDS rules. +columns: + name: Column + description: | + Columns which may appear within tabular data files, such as TSV or TSV.GZ files. +datatypes: + name: Data Type + description: | + A functional group of different types of data. + This is equivalent to the **Data Type** defined under Common Principles. +entities: + name: Entity + description: | + A key-value pair in a BIDS filename. + Valid keys, and the order in which they must appear, is defined within the BIDS schema. +formats: + name: Format + description: | + A regular expression defining valid values of different types. +metadata: + name: Metadata Field + description: | + A field that may be present in a BIDS sidecar JSON file. +modalities: + name: Modality + description: | + The category of brain data recorded by a file. + This is equivalent to the **Modality** defined under Common Principles. +suffixes: + name: Suffix + description: | + An alphanumeric value within BIDS filenames, located after the entity `key-value_` pairs + (thus after the final `_`), right before the **File extension**. + This is equivalent to the **suffix** defined under Common Principles. +top_level_files: + name: Top-Level File + description: | + A file which may appear at the top level of a dataset. From 204a025b09053ec240c59686addb551503151c90 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 29 Mar 2022 13:02:12 -0400 Subject: [PATCH 02/10] Try defining the objects' fields. --- src/schema/objects/types.yaml | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/schema/objects/types.yaml b/src/schema/objects/types.yaml index 45340413e5..d3ae31e724 100644 --- a/src/schema/objects/types.yaml +++ b/src/schema/objects/types.yaml @@ -5,40 +5,127 @@ associated_data: name: Associated Data description: | Directories that may appear within a dataset directory without following BIDS rules. + definition: + name: string + description: string columns: name: Column description: | Columns which may appear within tabular data files, such as TSV or TSV.GZ files. + definition: + name: string + description: string + type: + - string: + supplementary_fields: + - format + - unit + - enum + - pattern + - number: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - integer: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - boolean datatypes: name: Data Type description: | A functional group of different types of data. This is equivalent to the **Data Type** defined under Common Principles. + definition: + name: string + description: string entities: name: Entity description: | A key-value pair in a BIDS filename. Valid keys, and the order in which they must appear, is defined within the BIDS schema. + definition: + name: string + description: string + type: + - string: + supplementary_fields: + - format + - enum formats: name: Format description: | A regular expression defining valid values of different types. + definition: + name: string + description: string + pattern: string metadata: name: Metadata Field description: | A field that may be present in a BIDS sidecar JSON file. + definition: + name: string + description: string + type: + - string: + supplementary_fields: + - format + - unit + - enum + - pattern + - number: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - integer: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - array: + supplementary_fields: + - minItems + - maxItems + - items + - boolean + - object: + supplementary_fields: + - additionalProperties + - properties modalities: name: Modality description: | The category of brain data recorded by a file. This is equivalent to the **Modality** defined under Common Principles. + definition: + name: string + description: string suffixes: name: Suffix description: | An alphanumeric value within BIDS filenames, located after the entity `key-value_` pairs (thus after the final `_`), right before the **File extension**. This is equivalent to the **suffix** defined under Common Principles. + definition: + name: string + description: string + unit: string top_level_files: name: Top-Level File description: | A file which may appear at the top level of a dataset. + definition: + name: string + description: string From c48b64a1e90949831fcdc1c2c2ec80efcfc047df Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 30 Mar 2022 12:44:25 -0400 Subject: [PATCH 03/10] Add extensions. --- src/schema/objects/types.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/schema/objects/types.yaml b/src/schema/objects/types.yaml index d3ae31e724..418b34d75c 100644 --- a/src/schema/objects/types.yaml +++ b/src/schema/objects/types.yaml @@ -58,6 +58,18 @@ entities: supplementary_fields: - format - enum +extensions: + name: File Extension + description: | + A portion of the filename after the left-most + period (`.`) preceded by any other alphanumeric. For example, `.gitignore` does + not have a file extension, but the file extension of `test.nii.gz` is `.nii.gz`. + Note that the left-most period is included in the file extension. + + This is equivalent to the **File extension** defined under Common Principles. + definition: + name: string + description: string formats: name: Format description: | From 360d0062cce526a00445ff796101f5a67b926335 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 31 Mar 2022 14:20:04 -0400 Subject: [PATCH 04/10] Update types.yaml --- src/schema/objects/types.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/schema/objects/types.yaml b/src/schema/objects/types.yaml index 418b34d75c..ff1127b4a0 100644 --- a/src/schema/objects/types.yaml +++ b/src/schema/objects/types.yaml @@ -53,6 +53,7 @@ entities: definition: name: string description: string + entity: string type: - string: supplementary_fields: From 0732d304fb1b1162681c58617a97e96d46a2624d Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 20 Jul 2022 10:18:46 -0400 Subject: [PATCH 05/10] Move the type definition file. --- src/schema/{objects => meta}/types.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/schema/{objects => meta}/types.yaml (100%) diff --git a/src/schema/objects/types.yaml b/src/schema/meta/types.yaml similarity index 100% rename from src/schema/objects/types.yaml rename to src/schema/meta/types.yaml From 79c8fd3846f1e2deb91a8d4a12c602dc1f7bf31b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 20 Jul 2022 12:29:58 -0400 Subject: [PATCH 06/10] Fix the yaml file. --- src/schema/meta/types.yaml | 19 +++++++++++++------ tools/schemacode/bidsschematools/schema.py | 8 ++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/schema/meta/types.yaml b/src/schema/meta/types.yaml index ff1127b4a0..2694a6eaf3 100644 --- a/src/schema/meta/types.yaml +++ b/src/schema/meta/types.yaml @@ -6,7 +6,7 @@ associated_data: description: | Directories that may appear within a dataset directory without following BIDS rules. definition: - name: string + display_name: string description: string columns: name: Column @@ -14,6 +14,7 @@ columns: Columns which may appear within tabular data files, such as TSV or TSV.GZ files. definition: name: string + display_name: string description: string type: - string: @@ -43,7 +44,8 @@ datatypes: A functional group of different types of data. This is equivalent to the **Data Type** defined under Common Principles. definition: - name: string + value: string + display_name: string description: string entities: name: Entity @@ -52,6 +54,7 @@ entities: Valid keys, and the order in which they must appear, is defined within the BIDS schema. definition: name: string + display_name: string description: string entity: string type: @@ -69,14 +72,15 @@ extensions: This is equivalent to the **File extension** defined under Common Principles. definition: - name: string + value: string + display_name: string description: string formats: name: Format description: | A regular expression defining valid values of different types. definition: - name: string + display_name: string description: string pattern: string metadata: @@ -85,6 +89,7 @@ metadata: A field that may be present in a BIDS sidecar JSON file. definition: name: string + display_name: string description: string type: - string: @@ -123,7 +128,7 @@ modalities: The category of brain data recorded by a file. This is equivalent to the **Modality** defined under Common Principles. definition: - name: string + display_name: string description: string suffixes: name: Suffix @@ -132,7 +137,8 @@ suffixes: (thus after the final `_`), right before the **File extension**. This is equivalent to the **suffix** defined under Common Principles. definition: - name: string + value: string + display_name: string description: string unit: string top_level_files: @@ -141,4 +147,5 @@ top_level_files: A file which may appear at the top level of a dataset. definition: name: string + display_name: string description: string diff --git a/tools/schemacode/bidsschematools/schema.py b/tools/schemacode/bidsschematools/schema.py index 54d668d051..73887be90c 100644 --- a/tools/schemacode/bidsschematools/schema.py +++ b/tools/schemacode/bidsschematools/schema.py @@ -74,6 +74,7 @@ def load_schema(schema_path): Schema in dictionary form. """ schema_path = Path(schema_path) + meta_dir = schema_path / "meta/" objects_dir = schema_path / "objects/" rules_dir = schema_path / "rules/" @@ -83,9 +84,16 @@ def load_schema(schema_path): ) schema = {} + schema["meta"] = {} schema["objects"] = {} schema["rules"] = {} + # Load meta definitions. All are present in single files. + for meta_group_file in sorted(meta_dir.glob("*.yaml")): + lgr.debug(f"Loading {meta_group_file.stem} meta definitions.") + dict_ = yaml.safe_load(meta_group_file.read_text()) + schema["meta"][meta_group_file.stem] = dereference_yaml(dict_, dict_) + # Load object definitions. All are present in single files. for object_group_file in sorted(objects_dir.glob("*.yaml")): lgr.debug(f"Loading {object_group_file.stem} objects.") From 2c1bc21109b396608015f4177941e70fa309b947 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 15 Aug 2022 11:55:57 -0400 Subject: [PATCH 07/10] Create test_rules.py --- .../bidsschematools/tests/test_rules.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tools/schemacode/bidsschematools/tests/test_rules.py diff --git a/tools/schemacode/bidsschematools/tests/test_rules.py b/tools/schemacode/bidsschematools/tests/test_rules.py new file mode 100644 index 0000000000..fc2d213cef --- /dev/null +++ b/tools/schemacode/bidsschematools/tests/test_rules.py @@ -0,0 +1,25 @@ +"""Simple validation tests on schema rules.""" +TYPES = { + "string": str, + "number": float, + "integer": int, + "boolean": bool, + "object": dict, + "array": list, +} + + +def test_object_definitions(schema_obj): + """Ensure that all object definitions follow the appropriate type definition.""" + for type_name, type_objects in schema_obj["objects"].items(): + type_obj = schema_obj["meta"]["types"][type_name] + type_def = type_obj["definition"] + + for obj, obj_def in type_objects.items(): + print(obj) + print(obj_def) + + for field in type_def.keys(): + ... + + raise Exception() From df0259828ba54abf3111720d7fc12f8594779bb6 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 15 Aug 2022 11:58:28 -0400 Subject: [PATCH 08/10] Move things. --- .../bidsschematools/tests/test_rules.py | 40 +++++++++++++++++++ .../schemacode/schemacode/tests/test_rules.py | 40 ------------------- 2 files changed, 40 insertions(+), 40 deletions(-) delete mode 100644 tools/schemacode/schemacode/tests/test_rules.py diff --git a/tools/schemacode/bidsschematools/tests/test_rules.py b/tools/schemacode/bidsschematools/tests/test_rules.py index fc2d213cef..9d50b80b10 100644 --- a/tools/schemacode/bidsschematools/tests/test_rules.py +++ b/tools/schemacode/bidsschematools/tests/test_rules.py @@ -1,4 +1,5 @@ """Simple validation tests on schema rules.""" +"""Simple validation tests on schema rules.""" TYPES = { "string": str, "number": float, @@ -23,3 +24,42 @@ def test_object_definitions(schema_obj): ... raise Exception() + + +def _dict_key_lookup(_dict, key, path=[]): + """Look up any uses of a key in a nested dictionary. + + Adapted from https://stackoverflow.com/a/60377584/2589328. + """ + results = [] + if isinstance(_dict, dict): + if key in _dict: + results.append((path + [key], _dict[key])) + + for k, v in _dict.items(): + results.extend(_dict_key_lookup(v, key, path=path + [k])) + + elif isinstance(_dict, list): + for index, item in enumerate(_dict): + results.extend(_dict_key_lookup(item, key, path=path + [index])) + + return results + + +def test_rule_objects(schema_obj): + """Ensure that all objects referenced in the schema rules are defined in + its object portion. + """ + object_types = list(schema_obj["objects"].keys()) + for object_type in object_types: + type_instances_in_rules = _dict_key_lookup(schema_obj["rules"], object_type) + if not type_instances_in_rules: + continue + + for type_instance in type_instances_in_rules: + path, instance = type_instance + if isinstance(instance, dict): + instance = list(instance.keys()) + + for use in instance: + assert use in schema_obj["objects"][object_type].keys(), path diff --git a/tools/schemacode/schemacode/tests/test_rules.py b/tools/schemacode/schemacode/tests/test_rules.py deleted file mode 100644 index 7938f8a261..0000000000 --- a/tools/schemacode/schemacode/tests/test_rules.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Simple validation tests on schema rules.""" - - -def _dict_key_lookup(_dict, key, path=[]): - """Look up any uses of a key in a nested dictionary. - - Adapted from https://stackoverflow.com/a/60377584/2589328. - """ - results = [] - if isinstance(_dict, dict): - if key in _dict: - results.append((path + [key], _dict[key])) - - for k, v in _dict.items(): - results.extend(_dict_key_lookup(v, key, path=path + [k])) - - elif isinstance(_dict, list): - for index, item in enumerate(_dict): - results.extend(_dict_key_lookup(item, key, path=path + [index])) - - return results - - -def test_rule_objects(schema_obj): - """Ensure that all objects referenced in the schema rules are defined in - its object portion. - """ - object_types = list(schema_obj["objects"].keys()) - for object_type in object_types: - type_instances_in_rules = _dict_key_lookup(schema_obj["rules"], object_type) - if not type_instances_in_rules: - continue - - for type_instance in type_instances_in_rules: - path, instance = type_instance - if isinstance(instance, dict): - instance = list(instance.keys()) - - for use in instance: - assert use in schema_obj["objects"][object_type].keys(), path From 6166aba17230b4a6ae6712f8f2571372859ede73 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 19 Aug 2022 16:36:32 -0400 Subject: [PATCH 09/10] Run prettier. --- src/schema/meta/types.yaml | 110 ++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/src/schema/meta/types.yaml b/src/schema/meta/types.yaml index 2694a6eaf3..6d896e1212 100644 --- a/src/schema/meta/types.yaml +++ b/src/schema/meta/types.yaml @@ -17,27 +17,27 @@ columns: display_name: string description: string type: - - string: - supplementary_fields: - - format - - unit - - enum - - pattern - - number: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - integer: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - boolean + - string: + supplementary_fields: + - format + - unit + - enum + - pattern + - number: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - integer: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - boolean datatypes: name: Data Type description: | @@ -58,10 +58,10 @@ entities: description: string entity: string type: - - string: - supplementary_fields: - - format - - enum + - string: + supplementary_fields: + - format + - enum extensions: name: File Extension description: | @@ -92,36 +92,36 @@ metadata: display_name: string description: string type: - - string: - supplementary_fields: - - format - - unit - - enum - - pattern - - number: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - integer: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - array: - supplementary_fields: - - minItems - - maxItems - - items - - boolean - - object: - supplementary_fields: - - additionalProperties - - properties + - string: + supplementary_fields: + - format + - unit + - enum + - pattern + - number: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - integer: + supplementary_fields: + - unit + - exclusiveMinimum + - minimum + - exclusiveMaximum + - maximum + - array: + supplementary_fields: + - minItems + - maxItems + - items + - boolean + - object: + supplementary_fields: + - additionalProperties + - properties modalities: name: Modality description: | From dfb87672bc25eae9c6fddc4fcb78a654a53fb0c9 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 20 Aug 2022 09:27:05 -0400 Subject: [PATCH 10/10] More improvements, maybe. --- src/schema/meta/types.yaml | 158 +++++++++++------- tools/schemacode/bidsschematools/schema.py | 6 + .../bidsschematools/tests/test_rules.py | 19 ++- 3 files changed, 115 insertions(+), 68 deletions(-) diff --git a/src/schema/meta/types.yaml b/src/schema/meta/types.yaml index 6d896e1212..e95f9cc148 100644 --- a/src/schema/meta/types.yaml +++ b/src/schema/meta/types.yaml @@ -2,14 +2,16 @@ # This file describes the types of objects in the BIDS schema. # Each object type has its own YAML file describing valid possible values for objects of that type. associated_data: - name: Associated Data + name: associated_data + display_name: Associated Data description: | Directories that may appear within a dataset directory without following BIDS rules. definition: display_name: string description: string columns: - name: Column + name: columns + display_name: Column description: | Columns which may appear within tabular data files, such as TSV or TSV.GZ files. definition: @@ -17,29 +19,22 @@ columns: display_name: string description: string type: - - string: - supplementary_fields: - - format - - unit - - enum - - pattern - - number: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - integer: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - boolean + - $ref: meta.types._integer + - $ref: meta.types._number + - $ref: meta.types._string +common_principles: + name: common_principles + display_name: Common Principles + description: | + Directories that may appear within a dataset directory without following BIDS rules. + definition: + name: string + display_name: string + description: string datatypes: - name: Data Type + name: datatypes + display_name: Data Type description: | A functional group of different types of data. This is equivalent to the **Data Type** defined under Common Principles. @@ -48,7 +43,8 @@ datatypes: display_name: string description: string entities: - name: Entity + name: entities + display_name: Entity description: | A key-value pair in a BIDS filename. Valid keys, and the order in which they must appear, is defined within the BIDS schema. @@ -60,10 +56,11 @@ entities: type: - string: supplementary_fields: - - format - - enum + - format: string + - enum: array extensions: - name: File Extension + name: extensions + display_name: File Extension description: | A portion of the filename after the left-most period (`.`) preceded by any other alphanumeric. For example, `.gitignore` does @@ -76,7 +73,8 @@ extensions: display_name: string description: string formats: - name: Format + name: formats + display_name: Format description: | A regular expression defining valid values of different types. definition: @@ -84,7 +82,8 @@ formats: description: string pattern: string metadata: - name: Metadata Field + name: metadata + display_name: Metadata Field description: | A field that may be present in a BIDS sidecar JSON file. definition: @@ -92,38 +91,15 @@ metadata: display_name: string description: string type: - - string: - supplementary_fields: - - format - - unit - - enum - - pattern - - number: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - integer: - supplementary_fields: - - unit - - exclusiveMinimum - - minimum - - exclusiveMaximum - - maximum - - array: - supplementary_fields: - - minItems - - maxItems - - items - boolean - - object: - supplementary_fields: - - additionalProperties - - properties + - $ref: meta.types._array + - $ref: meta.types._integer + - $ref: meta.types._number + - $ref: meta.types._object + - $ref: meta.types._string modalities: - name: Modality + name: modalities + display_name: Modality description: | The category of brain data recorded by a file. This is equivalent to the **Modality** defined under Common Principles. @@ -131,7 +107,8 @@ modalities: display_name: string description: string suffixes: - name: Suffix + name: suffixes + display_name: Suffix description: | An alphanumeric value within BIDS filenames, located after the entity `key-value_` pairs (thus after the final `_`), right before the **File extension**. @@ -142,10 +119,69 @@ suffixes: description: string unit: string top_level_files: - name: Top-Level File + name: top_level_files + display_name: Top-Level File description: | A file which may appear at the top level of a dataset. definition: name: string display_name: string description: string +_string: + string: + supplementary_fields: + - format + - unit + - enum + - pattern +_number: + number: + supplementary_fields: + - unit: string + - exclusiveMinimum: + anyOf: + - number + - integer + - minimum: + anyOf: + - number + - integer + - exclusiveMaximum: + anyOf: + - number + - integer + - maximum: + anyOf: + - number + - integer +_integer: + integer: + supplementary_fields: + - unit: string + - exclusiveMinimum: + anyOf: + - number + - integer + - minimum: + anyOf: + - number + - integer + - exclusiveMaximum: + anyOf: + - number + - integer + - maximum: + anyOf: + - number + - integer +_array: + array: + supplementary_fields: + - minItems: integer + - maxItems: integer + - items: object +_object: + object: + supplementary_fields: + - additionalProperties: object + - properties: object diff --git a/tools/schemacode/bidsschematools/schema.py b/tools/schemacode/bidsschematools/schema.py index b30acd6689..dc4ba2e65b 100644 --- a/tools/schemacode/bidsschematools/schema.py +++ b/tools/schemacode/bidsschematools/schema.py @@ -222,12 +222,18 @@ def load_schema(schema_path=None): """ if schema_path is None: schema_path = utils.get_schema_path() + schema = Namespace.from_directory(Path(schema_path)) + if not schema.objects: raise ValueError(f"objects subdirectory path not found in {schema_path}") + if not schema.rules: raise ValueError(f"rules subdirectory path not found in {schema_path}") + if not schema.meta: + raise ValueError(f"meta subdirectory path not found in {schema_path}") + dereferenced = dereference_mapping(schema, schema) return Namespace.build(dereferenced) diff --git a/tools/schemacode/bidsschematools/tests/test_rules.py b/tools/schemacode/bidsschematools/tests/test_rules.py index eea807e160..7d7315fa8d 100644 --- a/tools/schemacode/bidsschematools/tests/test_rules.py +++ b/tools/schemacode/bidsschematools/tests/test_rules.py @@ -14,6 +14,7 @@ } +@pytest.mark.validate_schema def test_object_definitions(schema_obj): """Ensure that all object definitions follow the appropriate type definition.""" for type_name, type_objects in schema_obj["objects"].items(): @@ -21,13 +22,17 @@ def test_object_definitions(schema_obj): type_def = type_obj["definition"] for obj, obj_def in type_objects.items(): - print(obj) - print(obj_def) - - for field in type_def.keys(): - ... - - raise Exception() + if "type" in type_def.keys(): + valid_types = [ + thing if isinstance(thing, str) else list(thing.keys())[0] + for thing in type_def["type"] + ] + assert "type" in obj_def.keys(), f"{type_name}, {obj}" + assert obj_def["type"] in valid_types + + for field, value in obj_def.items(): + if field not in type_def.keys(): + pass def _dict_key_lookup(_dict, key, path=[]):