From 2fc8982ee2411633e08dce376cd18027f1273ed8 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Wed, 13 Mar 2024 11:06:05 -0400
Subject: [PATCH] Fix up examples in src/schema/README.md to not use outdated
 schema paths (#1698)

* Fix some example paths which no longer correspond

* skip example

* fixup the fixup

* "Fix" example to correspond to current situation

May be another simpler example should be chosen?

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add missing (gzip, ome, tiff) context objects

* Make helper to check paths in example to take arg to point to schema.org + add it to RTD workflow

* Install jq in RTD

* Make script actually exit with non-0 if anything is unreachable

* list jq in apt_packages

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 readthedocs.yml              |  3 +++
 src/schema/README.md         | 37 ++++++++++++++++++++----------------
 tools/no-bad-schema-paths.sh | 29 ++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 16 deletions(-)
 create mode 100755 tools/no-bad-schema-paths.sh

diff --git a/readthedocs.yml b/readthedocs.yml
index cc3f461fc7..5611c59505 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -2,11 +2,14 @@ version: 2
 
 build:
   os: ubuntu-22.04
+  apt_packages:
+    - jq
   tools:
     python: "3.11"
   jobs:
     pre_build:
       - bst -v export --output src/schema.json
+      - tools/no-bad-schema-paths.sh src/schema.json  # README.md might need fixing
 
 mkdocs:
   configuration: mkdocs.yml
diff --git a/src/schema/README.md b/src/schema/README.md
index 3d5c733714..d801314595 100644
--- a/src/schema/README.md
+++ b/src/schema/README.md
@@ -136,18 +136,20 @@ with the object being referenced.
 The following two prototypical examples are presented to clarify the semantics of
 references (the cases in which they are used will be presented later):
 
-1.  In `objects.metadata`:
+1.  In `objects.enums`:
     ```YAML
     _GeneticLevelEnum:
       type: string
       enum:
-        - Genetic
-        - Genomic
-        - Epigenomic
-        - Transcriptomic
-        - Metabolomic
-        - Proteomic
-
+        - $ref: objects.enums.Genetic.value
+        - $ref: objects.enums.Genomic.value
+        - $ref: objects.enums.Epigenomic.value
+        - $ref: objects.enums.Transcriptomic.value
+        - $ref: objects.enums.Metabolomic.value
+        - $ref: objects.enums.Proteomic.value
+    ```
+    and in `objects.metadata`:
+    ```YAML
     GeneticLevel:
       name: GeneticLevel
       display_name: Genetic Level
@@ -156,29 +158,29 @@ references (the cases in which they are used will be presented later):
         Values MUST be one of `"Genetic"`, `"Genomic"`, `"Epigenomic"`,
         `"Transcriptomic"`, `"Metabolomic"`, or `"Proteomic"`.
       anyOf:
-        - $ref: objects.metadata._GeneticLevelEnum
+        - $ref: objects.enums._GeneticLevelEnum
         - type: array
           items:
-            $ref: objects.metadata._GeneticLevelEnum
+            $ref: objects.enums._GeneticLevelEnum
     ```
     Here `_GeneticLevelEnum` is used to describe the valid values of `GeneticLevel`,
-    and the references inside `GeneticLevel.anyOf` indicate that there may be a single
+    (which are in turn references to individual values), and the references inside `GeneticLevel.anyOf` indicate that there may be a single
     such value or a list of values.
 
-1.  In `rules.datatypes.derivatives.common_derivatives`:
+1.  In [`rules.files.deriv.preprocessed_data`](./rules/files/deriv/preprocessed_data.yaml):
     ```YAML
     anat_nonparametric_common:
-      $ref: rules.datatypes.anat.nonparametric
+      $ref: rules.files.raw.anat.nonparametric
       entities:
-        $ref: rules.datatypes.anat.nonparametric.entities
+        $ref: rules.files.raw.anat.nonparametric.entities
         space: optional
         description: optional
     ```
     Here, the derivative datatype rule starts by copying the raw datatype rule
-    `rules.datatypes.anat.nonparametric`.
+    `rules.files.raw.anat.nonparametric`.
     It then *overrides* the `entities` portion of that rule with a new object.
     To *extend* the original `entities`, it again begins
-    by referencing `rules.datatypes.anat.nonparametric.entities`,
+    by referencing `rules.files.raw.anat.nonparametric.entities`,
     and adding the new entities `space` and `description`.
 
 ### Expressions
@@ -229,7 +231,10 @@ which (currently) contains at the top level:
 -   `associations`: associated files, discovered by the inheritance principle
 -   `columns`: the columns in the current TSV file
 -   `json`: the contents of the current JSON file
+-   `gzip`: the contents of the current file GZIP header
 -   `nifti_header`: selected contents of the current NIfTI file's header
+-   `ome`: the contents of the current OME-XML metadata
+-   `tiff`: the contents of the current TIFF file's header
 
 Some of these are strings, while others are nested objects.
 These are to be populated by an *interpreter* of the schema,
diff --git a/tools/no-bad-schema-paths.sh b/tools/no-bad-schema-paths.sh
new file mode 100755
index 0000000000..5805245a44
--- /dev/null
+++ b/tools/no-bad-schema-paths.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -eu -o pipefail
+
+schema_json=$(readlink -f "$1")
+
+cd "$(dirname "$(readlink -f "$0")")/../src/schema"
+
+# Create a temporary file and ensure it gets deleted on exit
+tmpfile=$(mktemp)
+trap 'rm -f "$tmpfile"' EXIT
+
+grep -oE '(://)?([-_A-Za-z]+\.)+[-_A-Za-z]+' README.md \
+    | grep -v -e :// -e '\.\(md\|html\|json\|tsv\|yaml\)$' \
+    | grep -e '^\(meta\|objects\|rules\)' \
+    | grep -v 'objects.metadata.OtherObjectName' \
+    | sort | uniq | \
+    while IFS= read -r p; do
+        v=$(jq ".$p" < "$schema_json" | grep -v '^null$' || echo "fail")
+        if [ -z "$v" ] || [ "$v" = "fail" ]; then
+            echo "$p: not reachable" >> "$tmpfile"
+        fi
+    done
+
+# Check if the temporary file is empty
+if [ -s "$tmpfile" ]; then
+    cat "$tmpfile" # Display the not reachable paths
+    exit 1
+fi