From 8a4ceaa3ba1bf78b2179df9a799a4906a81361de Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 6 Feb 2024 19:45:48 -0500 Subject: [PATCH 01/10] Fix some example paths which no longer correspond --- src/schema/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/schema/README.md b/src/schema/README.md index 3d5c733714..d325abe387 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -165,20 +165,20 @@ references (the cases in which they are used will be presented later): and the references inside `GeneticLevel.anyOf` indicate that there may be a single such value or a list of values. -1. In `rules.datatypes.derivatives.common_derivatives`: +1. In [`rules.files.deriv.preprocessed_data`](./rules/files/deriv/preprocessed_data.yaml): ```YAML anat_nonparametric_common: - $ref: rules.datatypes.anat.nonparametric + $ref: rules.files.raw.anat.nonparametric entities: - $ref: rules.datatypes.anat.nonparametric.entities + $ref: rules.files.raw.anat.nonparametric.entities space: optional description: optional ``` Here, the derivative datatype rule starts by copying the raw datatype rule - `rules.datatypes.anat.nonparametric`. + `rules.files.deriv.anat.nonparametric`. It then *overrides* the `entities` portion of that rule with a new object. To *extend* the original `entities`, it again begins - by referencing `rules.datatypes.anat.nonparametric.entities`, + by referencing `rules.files.raw.anat.nonparametric.entities`, and adding the new entities `space` and `description`. ### Expressions From f171bd5ec23d1873b49689598a4de4abe7356109 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 6 Feb 2024 19:58:44 -0500 Subject: [PATCH 02/10] skip example --- tools/no-bad-schema-paths.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 tools/no-bad-schema-paths.sh diff --git a/tools/no-bad-schema-paths.sh b/tools/no-bad-schema-paths.sh new file mode 100755 index 0000000000..77ab559e55 --- /dev/null +++ b/tools/no-bad-schema-paths.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -eu -o pipefail + +cd `readlink -f "$0" | xargs dirname`/../src/schema + +grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \ + | grep -v -e :// -e '\.\(md\|html\|json\|tsv\|yaml\)$' \ + | grep -e '^\(meta\|objects\|rules\)' \ + | grep -v 'objects.metadata.OtherObjectName' \ + | sort | uniq | \ + while read p; do + # filepath=${path//.//}; + #echo "$filepath" + #ls -ld "$filepath"* || echo "nope" + #echo -n "$path: " + v=$(jq ".$p" < ../../../bids-schema/versions/master/schema.json | grep -v '^null$' || :) + if [ -z "$v" ]; then + echo "$p: not reachable" + fi + done From 6269e5928307de8df064e54cbfe2aa94810049f7 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 6 Feb 2024 19:59:19 -0500 Subject: [PATCH 03/10] fixup the fixup --- src/schema/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/README.md b/src/schema/README.md index d325abe387..6dca36157f 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -175,7 +175,7 @@ references (the cases in which they are used will be presented later): description: optional ``` Here, the derivative datatype rule starts by copying the raw datatype rule - `rules.files.deriv.anat.nonparametric`. + `rules.files.raw.anat.nonparametric`. It then *overrides* the `entities` portion of that rule with a new object. To *extend* the original `entities`, it again begins by referencing `rules.files.raw.anat.nonparametric.entities`, From 61ff3b85a14d11bd7603bb9e5d1533b0a3e3c2c7 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 6 Feb 2024 20:00:02 -0500 Subject: [PATCH 04/10] "Fix" example to correspond to current situation May be another simpler example should be chosen? --- src/schema/README.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/schema/README.md b/src/schema/README.md index 6dca36157f..4b8dd6c6e3 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -136,18 +136,20 @@ with the object being referenced. The following two prototypical examples are presented to clarify the semantics of references (the cases in which they are used will be presented later): -1. In `objects.metadata`: +1. In `objects.enums`: ```YAML _GeneticLevelEnum: type: string enum: - - Genetic - - Genomic - - Epigenomic - - Transcriptomic - - Metabolomic - - Proteomic - + - $ref: objects.enums.Genetic.value + - $ref: objects.enums.Genomic.value + - $ref: objects.enums.Epigenomic.value + - $ref: objects.enums.Transcriptomic.value + - $ref: objects.enums.Metabolomic.value + - $ref: objects.enums.Proteomic.value + ``` + and in `objects.metadata`: + ```YAML GeneticLevel: name: GeneticLevel display_name: Genetic Level @@ -156,13 +158,13 @@ references (the cases in which they are used will be presented later): Values MUST be one of `"Genetic"`, `"Genomic"`, `"Epigenomic"`, `"Transcriptomic"`, `"Metabolomic"`, or `"Proteomic"`. anyOf: - - $ref: objects.metadata._GeneticLevelEnum + - $ref: objects.enums._GeneticLevelEnum - type: array items: - $ref: objects.metadata._GeneticLevelEnum + $ref: objects.enums._GeneticLevelEnum ``` Here `_GeneticLevelEnum` is used to describe the valid values of `GeneticLevel`, - and the references inside `GeneticLevel.anyOf` indicate that there may be a single + (which are in turn references to individual values), and the references inside `GeneticLevel.anyOf` indicate that there may be a single such value or a list of values. 1. In [`rules.files.deriv.preprocessed_data`](./rules/files/deriv/preprocessed_data.yaml): From 242ce05ddbfe32e5d837b3ebf9187ec0370719e5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 01:02:19 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tools/no-bad-schema-paths.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/no-bad-schema-paths.sh b/tools/no-bad-schema-paths.sh index 77ab559e55..6bee4e3dcc 100755 --- a/tools/no-bad-schema-paths.sh +++ b/tools/no-bad-schema-paths.sh @@ -9,8 +9,8 @@ grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \ | grep -e '^\(meta\|objects\|rules\)' \ | grep -v 'objects.metadata.OtherObjectName' \ | sort | uniq | \ - while read p; do - # filepath=${path//.//}; + while read p; do + # filepath=${path//.//}; #echo "$filepath" #ls -ld "$filepath"* || echo "nope" #echo -n "$path: " From 4a09bbd8e65f95ec4753650ab434ff0ab784bf73 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 6 Feb 2024 20:29:44 -0500 Subject: [PATCH 06/10] Add missing (gzip, ome, tiff) context objects --- src/schema/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/schema/README.md b/src/schema/README.md index 4b8dd6c6e3..d801314595 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -231,7 +231,10 @@ which (currently) contains at the top level: - `associations`: associated files, discovered by the inheritance principle - `columns`: the columns in the current TSV file - `json`: the contents of the current JSON file +- `gzip`: the contents of the current file GZIP header - `nifti_header`: selected contents of the current NIfTI file's header +- `ome`: the contents of the current OME-XML metadata +- `tiff`: the contents of the current TIFF file's header Some of these are strings, while others are nested objects. These are to be populated by an *interpreter* of the schema, From 66edbabf645f149da4dea8c9400f88eb13f30310 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Mar 2024 17:37:12 -0500 Subject: [PATCH 07/10] Make helper to check paths in example to take arg to point to schema.org + add it to RTD workflow --- readthedocs.yml | 1 + tools/no-bad-schema-paths.sh | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index cc3f461fc7..207c31a23a 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,6 +7,7 @@ build: jobs: pre_build: - bst -v export --output src/schema.json + - tools/no-bad-schema-paths.sh src/schema.json # README.md might need fixing mkdocs: configuration: mkdocs.yml diff --git a/tools/no-bad-schema-paths.sh b/tools/no-bad-schema-paths.sh index 6bee4e3dcc..1825036f42 100755 --- a/tools/no-bad-schema-paths.sh +++ b/tools/no-bad-schema-paths.sh @@ -2,6 +2,8 @@ set -eu -o pipefail +schema_json=$(readlink -f "$1") + cd `readlink -f "$0" | xargs dirname`/../src/schema grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \ @@ -14,7 +16,7 @@ grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \ #echo "$filepath" #ls -ld "$filepath"* || echo "nope" #echo -n "$path: " - v=$(jq ".$p" < ../../../bids-schema/versions/master/schema.json | grep -v '^null$' || :) + v=$(jq ".$p" < "$schema_json" | grep -v '^null$' || :) if [ -z "$v" ]; then echo "$p: not reachable" fi From 8de3f81c6773f38eb931152ffc7dc45bbc7bb787 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Mar 2024 17:50:22 -0500 Subject: [PATCH 08/10] Install jq in RTD --- readthedocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/readthedocs.yml b/readthedocs.yml index 207c31a23a..205236c485 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,6 +7,7 @@ build: jobs: pre_build: - bst -v export --output src/schema.json + - sudo apt install jq - tools/no-bad-schema-paths.sh src/schema.json # README.md might need fixing mkdocs: From 858fc54a311d5efd54a3848a18e81a7048ddbe3a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Mar 2024 17:50:36 -0500 Subject: [PATCH 09/10] Make script actually exit with non-0 if anything is unreachable --- tools/no-bad-schema-paths.sh | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tools/no-bad-schema-paths.sh b/tools/no-bad-schema-paths.sh index 1825036f42..5805245a44 100755 --- a/tools/no-bad-schema-paths.sh +++ b/tools/no-bad-schema-paths.sh @@ -4,20 +4,26 @@ set -eu -o pipefail schema_json=$(readlink -f "$1") -cd `readlink -f "$0" | xargs dirname`/../src/schema +cd "$(dirname "$(readlink -f "$0")")/../src/schema" + +# Create a temporary file and ensure it gets deleted on exit +tmpfile=$(mktemp) +trap 'rm -f "$tmpfile"' EXIT grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \ | grep -v -e :// -e '\.\(md\|html\|json\|tsv\|yaml\)$' \ | grep -e '^\(meta\|objects\|rules\)' \ | grep -v 'objects.metadata.OtherObjectName' \ | sort | uniq | \ - while read p; do - # filepath=${path//.//}; - #echo "$filepath" - #ls -ld "$filepath"* || echo "nope" - #echo -n "$path: " - v=$(jq ".$p" < "$schema_json" | grep -v '^null$' || :) - if [ -z "$v" ]; then - echo "$p: not reachable" + while IFS= read -r p; do + v=$(jq ".$p" < "$schema_json" | grep -v '^null$' || echo "fail") + if [ -z "$v" ] || [ "$v" = "fail" ]; then + echo "$p: not reachable" >> "$tmpfile" fi done + +# Check if the temporary file is empty +if [ -s "$tmpfile" ]; then + cat "$tmpfile" # Display the not reachable paths + exit 1 +fi From 1c0edcde1a61d68535a9944f9565ac0a14522a18 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Mar 2024 18:00:20 -0500 Subject: [PATCH 10/10] list jq in apt_packages --- readthedocs.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index 205236c485..5611c59505 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -2,12 +2,13 @@ version: 2 build: os: ubuntu-22.04 + apt_packages: + - jq tools: python: "3.11" jobs: pre_build: - bst -v export --output src/schema.json - - sudo apt install jq - tools/no-bad-schema-paths.sh src/schema.json # README.md might need fixing mkdocs: