From 7262fdae16bf62beccf9ddbe6ced9d37352fe6bd Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:08:58 -0400 Subject: [PATCH 01/30] Draft formats file. --- src/schema/formats.yaml | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 src/schema/formats.yaml diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml new file mode 100644 index 0000000000..6d6cf9bafd --- /dev/null +++ b/src/schema/formats.yaml @@ -0,0 +1,66 @@ +--- +# This file defines valid patterns for different formats +# Entity patterns +label: + description: | + Freeform labels without special characters. + pattern: ^[0-9a-zA-Z]+$ +index: + description: | + Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. + pattern: ^[0-9]+$ +# Metadata types +string: + description: | + A basic string type (not a specific format). + This should allow any free-form string *except* "n/a". + pattern: ^(?!(n/a)$).*$ +integer: + description: | + An integer. + pattern: ^[+-]?\d+$ +number: + description: | + A number. + pattern: ^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)$ +boolean: + description: | + A boolean. + pattern: ^(true|false)$ +# String patterns +date: + description: | + A date. + YYYY-MM-DD[Z] + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?[A-Z]*$ +datetime: + description: | + A datetime. + YYYY-MM-DDThh:mm:ss[.000000][Z] + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})$ +unit: + description: | + A unit. + Currently this matches any string that isn't "n/a" + TODO: Somehow reference the actual unit options in the Units appendix. + pattern: ^(?!(n/a)$).*$ +dataset_relative: + description: | + A path to a file, relative to the dataset folder. + + The validation for this format is minimal. + pattern: ^[0-9a-zA-Z/_-\.]+$ +participant_relative: + description: | + A path to a file, relative to the participant's folder in the dataset. + + The validation for this format is minimal. + pattern: ^[0-9a-zA-Z/_-\.]+$ +uri: + description: | + A uniform resource indicator. + pattern: ^[a-zA-Z]+:[0-9a-zA-Z/_-\.]+$ +bids_uri: + description: | + A BIDS uniform resource indicator. + pattern: ^bids:[0-9a-zA-Z/_-\.]+$ From f9a5d15a6939b6958b8dd52ff3f4f58327247d1c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:12:17 -0400 Subject: [PATCH 02/30] Limit time zones to between 3 and 4 characters. --- src/schema/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index 6d6cf9bafd..b31b6a048e 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -32,7 +32,7 @@ date: description: | A date. YYYY-MM-DD[Z] - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?[A-Z]*$ + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})$ datetime: description: | A datetime. From 0d053d8be7092d864a0443532a99f99c6fd17e1c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:17:07 -0400 Subject: [PATCH 03/30] Add stimuli_relative format. --- src/schema/formats.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index b31b6a048e..d09f905b8e 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -44,6 +44,12 @@ unit: Currently this matches any string that isn't "n/a" TODO: Somehow reference the actual unit options in the Units appendix. pattern: ^(?!(n/a)$).*$ +stimuli_relative: + description: | + A path to a stimulus file, relative to a `/stimuli` folder somewhere. + + The validation for this format is minimal. + pattern: ^[0-9a-zA-Z/_-\.]+$ dataset_relative: description: | A path to a file, relative to the dataset folder. From f53d806707b3bd3f30a05439fa3604d0cabd776a Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:36:47 -0400 Subject: [PATCH 04/30] Don't allow absolute paths or problematic start folders. --- src/schema/formats.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index d09f905b8e..f504cc2edf 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -49,19 +49,19 @@ stimuli_relative: A path to a stimulus file, relative to a `/stimuli` folder somewhere. The validation for this format is minimal. - pattern: ^[0-9a-zA-Z/_-\.]+$ + pattern: ^(?!/)(?!stimuli/)[0-9a-zA-Z/_-\.]+$ dataset_relative: description: | A path to a file, relative to the dataset folder. The validation for this format is minimal. - pattern: ^[0-9a-zA-Z/_-\.]+$ + pattern: ^(?!/)[0-9a-zA-Z/_-\.]+$ participant_relative: description: | A path to a file, relative to the participant's folder in the dataset. The validation for this format is minimal. - pattern: ^[0-9a-zA-Z/_-\.]+$ + pattern: ^(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+$ uri: description: | A uniform resource indicator. From e6aa4b342e81f517450637af08bcf89793aea508 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:38:27 -0400 Subject: [PATCH 05/30] Use URI regex. From https://datatracker.ietf.org/doc/html/rfc3986#appendix-B. --- src/schema/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index f504cc2edf..6634e29e09 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -65,7 +65,7 @@ participant_relative: uri: description: | A uniform resource indicator. - pattern: ^[a-zA-Z]+:[0-9a-zA-Z/_-\.]+$ + pattern: ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? bids_uri: description: | A BIDS uniform resource indicator. From 35cf7c2265e07eaf2c6ec1bb9903cb891cd9d2fc Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:39:07 -0400 Subject: [PATCH 06/30] Fix(?) BIDS URI regex. --- src/schema/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index 6634e29e09..beef2660a1 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -69,4 +69,4 @@ uri: bids_uri: description: | A BIDS uniform resource indicator. - pattern: ^bids:[0-9a-zA-Z/_-\.]+$ + pattern: ^bids:[0-9a-zA-Z/\_\-\.]+$ From 40fa1d2e31d2afeffd0912a568fbaf460a0e2e72 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:40:00 -0400 Subject: [PATCH 07/30] Update formats.yaml --- src/schema/formats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index beef2660a1..78297e9848 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -49,13 +49,13 @@ stimuli_relative: A path to a stimulus file, relative to a `/stimuli` folder somewhere. The validation for this format is minimal. - pattern: ^(?!/)(?!stimuli/)[0-9a-zA-Z/_-\.]+$ + pattern: ^(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+$ dataset_relative: description: | A path to a file, relative to the dataset folder. The validation for this format is minimal. - pattern: ^(?!/)[0-9a-zA-Z/_-\.]+$ + pattern: ^(?!/)[0-9a-zA-Z/\_\-\.]+$ participant_relative: description: | A path to a file, relative to the participant's folder in the dataset. From 00aef747ae9552a63866ab827c9516274ee64288 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:42:15 -0400 Subject: [PATCH 08/30] Don't allow just zeros in index regex. --- src/schema/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index 78297e9848..a1eee86094 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -8,7 +8,7 @@ label: index: description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. - pattern: ^[0-9]+$ + pattern: ^[0-9]*[1-9]+[0-9]*$ # Metadata types string: description: | From 7143ee8887b50092e17d1f79f60c61c99dd0cbdd Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:42:54 -0400 Subject: [PATCH 09/30] pattern --> format. --- src/schema/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index a1eee86094..3131bdb9af 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -27,7 +27,7 @@ boolean: description: | A boolean. pattern: ^(true|false)$ -# String patterns +# String formats date: description: | A date. From a3fda8e4fa26a1f754bfcb51bf6206cd8fb9d5af Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 24 Sep 2021 14:58:42 -0400 Subject: [PATCH 10/30] Document the rules for the different formats. --- src/schema/formats.yaml | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index 3131bdb9af..3923f8654d 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -8,40 +8,44 @@ label: index: description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. + An index may not be all zeros. pattern: ^[0-9]*[1-9]+[0-9]*$ # Metadata types string: description: | - A basic string type (not a specific format). + The basic string type (not a specific format). This should allow any free-form string *except* "n/a". pattern: ^(?!(n/a)$).*$ integer: description: | - An integer. + An integer which may be positive or negative. pattern: ^[+-]?\d+$ number: description: | - A number. + A number which may be an integer or float, positive or negative. pattern: ^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)$ boolean: description: | A boolean. + Must be either "true" or "false". pattern: ^(true|false)$ # String formats date: description: | - A date. - YYYY-MM-DD[Z] + A date in the form YYYY-MM-DD[Z], + where [Z] is an optional, valid timezone code. pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})$ datetime: description: | - A datetime. - YYYY-MM-DDThh:mm:ss[.000000][Z] + A datetime in the form YYYY-MM-DDThh:mm:ss[.000000][Z], + where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, + and [Z] is an optional, valid timezone code. pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})$ unit: description: | A unit. - Currently this matches any string that isn't "n/a" + Currently this matches any string that isn't "n/a". + TODO: Somehow reference the actual unit options in the Units appendix. pattern: ^(?!(n/a)$).*$ stimuli_relative: @@ -49,18 +53,26 @@ stimuli_relative: A path to a stimulus file, relative to a `/stimuli` folder somewhere. The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path) or "stimuli/" + (a relative path starting with the stimuli folder, rather than relative to that folder). pattern: ^(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+$ dataset_relative: description: | A path to a file, relative to the dataset folder. The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path). pattern: ^(?!/)[0-9a-zA-Z/\_\-\.]+$ participant_relative: description: | A path to a file, relative to the participant's folder in the dataset. The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path) or "sub/" + (a relative path starting with the participant folder, rather than relative to that folder). pattern: ^(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+$ uri: description: | @@ -69,4 +81,8 @@ uri: bids_uri: description: | A BIDS uniform resource indicator. - pattern: ^bids:[0-9a-zA-Z/\_\-\.]+$ + + The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid URI, + starting with "bids:". + pattern: ^bids:[0-9a-zA-Z/#:\?\_\-\.]+$ From dfd30572e57f021936a17dafba4fe5c7245b7a80 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 12:23:28 -0400 Subject: [PATCH 11/30] Remove fancy n/a handling. I was so proud of it too :sob: --- src/schema/formats.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/schema/formats.yaml b/src/schema/formats.yaml index 3923f8654d..72ffa35d78 100644 --- a/src/schema/formats.yaml +++ b/src/schema/formats.yaml @@ -14,8 +14,8 @@ index: string: description: | The basic string type (not a specific format). - This should allow any free-form string *except* "n/a". - pattern: ^(?!(n/a)$).*$ + This should allow any free-form string. + pattern: ^.*$ integer: description: | An integer which may be positive or negative. @@ -44,10 +44,13 @@ datetime: unit: description: | A unit. - Currently this matches any string that isn't "n/a". + SI units in CMIXF formatting are RECOMMENDED + (see [Units](/02-common-principles.html#units)). + + Currently this matches any string. TODO: Somehow reference the actual unit options in the Units appendix. - pattern: ^(?!(n/a)$).*$ + pattern: ^.*$ stimuli_relative: description: | A path to a stimulus file, relative to a `/stimuli` folder somewhere. From 89b37e6f157517da5de1a544b80c380fa29a64bf Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 12:24:10 -0400 Subject: [PATCH 12/30] Move formats file into rules folder. --- src/schema/{ => rules}/formats.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/schema/{ => rules}/formats.yaml (100%) diff --git a/src/schema/formats.yaml b/src/schema/rules/formats.yaml similarity index 100% rename from src/schema/formats.yaml rename to src/schema/rules/formats.yaml From 25944e7126e5b3fc19d04a437995b2fe0d05222c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 12:25:08 -0400 Subject: [PATCH 13/30] Actually, the individual formats are really objects. --- src/schema/{rules => objects}/formats.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/schema/{rules => objects}/formats.yaml (100%) diff --git a/src/schema/rules/formats.yaml b/src/schema/objects/formats.yaml similarity index 100% rename from src/schema/rules/formats.yaml rename to src/schema/objects/formats.yaml From 3189a886e5f80427e348f32fc44a0807fe82dd6c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 15:06:10 -0400 Subject: [PATCH 14/30] Add patterns for rrid and time. --- src/schema/objects/formats.yaml | 14 +++++++++++--- src/schema/objects/metadata.yaml | 8 ++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 72ffa35d78..f8835c8b50 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -32,15 +32,19 @@ boolean: # String formats date: description: | - A date in the form YYYY-MM-DD[Z], + A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})$ datetime: description: | - A datetime in the form YYYY-MM-DDThh:mm:ss[.000000][Z], + A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})$ +time: + description: | + A time in the form `"hh:mm:ss"`. + pattern: ^(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]$ unit: description: | A unit. @@ -77,10 +81,14 @@ participant_relative: without starting with "/" (an absolute path) or "sub/" (a relative path starting with the participant folder, rather than relative to that folder). pattern: ^(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+$ +rrid: + description: | + A [research resource identifier](https://scicrunch.org/resources). + pattern: ^.+_.+$ uri: description: | A uniform resource indicator. - pattern: ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + pattern: ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$ bids_uri: description: | A BIDS uniform resource indicator. diff --git a/src/schema/objects/metadata.yaml b/src/schema/objects/metadata.yaml index a8e572a878..f553df5db5 100644 --- a/src/schema/objects/metadata.yaml +++ b/src/schema/objects/metadata.yaml @@ -1603,7 +1603,7 @@ MolarActivityMeasTime: Time to which molar radioactivity measurement above applies in the default unit `"hh:mm:ss"`. type: string - pattern: ^(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]$ + format: time MolarActivityUnits: name: MolarActivityUnits description: | @@ -2251,7 +2251,7 @@ SoftwareRRID: Examples: The RRID for Psychtoolbox is 'SCR_002881', and that of PsychoPy is 'SCR_006571'. type: string - pattern: .+_.+ + format: rrid SoftwareVersion: name: SoftwareVersion description: | @@ -2339,7 +2339,7 @@ SpecificRadioactivityMeasTime: Time to which specific radioactivity measurement above applies in the default unit `"hh:mm:ss"`. type: string - pattern: ^(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]$ + format: time SpecificRadioactivityUnits: name: SpecificRadioactivityUnits description: | @@ -2460,7 +2460,7 @@ TimeZero: in the unit "hh:mm:ss". This should be equal to `InjectionStart` or `ScanStart`. type: string - pattern: ^(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]$ + format: time TissueOrigin: name: TissueOrigin description: | From a2e56b0b7bcba53d1eea0e4f203f3610caaa7fec Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 15 Oct 2021 13:16:38 -0400 Subject: [PATCH 15/30] Be more specific with RRIDs. --- src/schema/objects/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index f8835c8b50..70ba356cd1 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -84,7 +84,7 @@ participant_relative: rrid: description: | A [research resource identifier](https://scicrunch.org/resources). - pattern: ^.+_.+$ + pattern: ^RRID:.+_.+$ uri: description: | A uniform resource indicator. From 3a2eac3ca8db99622abf36d8b6fe66cc21dc33d6 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 6 Dec 2021 13:44:37 -0500 Subject: [PATCH 16/30] Remove anchors around patterns and add quotes. --- src/schema/objects/formats.yaml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 70ba356cd1..70467f1d67 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -4,47 +4,47 @@ label: description: | Freeform labels without special characters. - pattern: ^[0-9a-zA-Z]+$ + pattern: "[0-9a-zA-Z]+" index: description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. An index may not be all zeros. - pattern: ^[0-9]*[1-9]+[0-9]*$ + pattern: "[0-9]*[1-9]+[0-9]*" # Metadata types string: description: | The basic string type (not a specific format). This should allow any free-form string. - pattern: ^.*$ + pattern: ".*" integer: description: | An integer which may be positive or negative. - pattern: ^[+-]?\d+$ + pattern: "[+-]?\d+" number: description: | A number which may be an integer or float, positive or negative. - pattern: ^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)$ + pattern: "[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)" boolean: description: | A boolean. Must be either "true" or "false". - pattern: ^(true|false)$ + pattern: "(true|false)" # String formats date: description: | A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})$ + pattern: "[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})" datetime: description: | A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})$ + pattern: "[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})" time: description: | A time in the form `"hh:mm:ss"`. - pattern: ^(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]$ + pattern: "(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]" unit: description: | A unit. @@ -54,7 +54,7 @@ unit: Currently this matches any string. TODO: Somehow reference the actual unit options in the Units appendix. - pattern: ^.*$ + pattern: ".*" stimuli_relative: description: | A path to a stimulus file, relative to a `/stimuli` folder somewhere. @@ -63,7 +63,7 @@ stimuli_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "stimuli/" (a relative path starting with the stimuli folder, rather than relative to that folder). - pattern: ^(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+$ + pattern: "(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+" dataset_relative: description: | A path to a file, relative to the dataset folder. @@ -71,7 +71,7 @@ dataset_relative: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path). - pattern: ^(?!/)[0-9a-zA-Z/\_\-\.]+$ + pattern: "(?!/)[0-9a-zA-Z/\_\-\.]+" participant_relative: description: | A path to a file, relative to the participant's folder in the dataset. @@ -80,15 +80,15 @@ participant_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "sub/" (a relative path starting with the participant folder, rather than relative to that folder). - pattern: ^(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+$ + pattern: "(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+" rrid: description: | A [research resource identifier](https://scicrunch.org/resources). - pattern: ^RRID:.+_.+$ + pattern: "RRID:.+_.+" uri: description: | A uniform resource indicator. - pattern: ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$ + pattern: "(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?" bids_uri: description: | A BIDS uniform resource indicator. @@ -96,4 +96,4 @@ bids_uri: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid URI, starting with "bids:". - pattern: ^bids:[0-9a-zA-Z/#:\?\_\-\.]+$ + pattern: "bids:[0-9a-zA-Z/#:\?\_\-\.]+" From f9c316c05b1e7646b76d43933ad36ed0850a58a8 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 6 Dec 2021 13:48:40 -0500 Subject: [PATCH 17/30] Replace double-quotes with single-quotes. --- src/schema/objects/formats.yaml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 70467f1d67..5323fa532c 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -4,47 +4,47 @@ label: description: | Freeform labels without special characters. - pattern: "[0-9a-zA-Z]+" + pattern: '[0-9a-zA-Z]+' index: description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. An index may not be all zeros. - pattern: "[0-9]*[1-9]+[0-9]*" + pattern: '[0-9]*[1-9]+[0-9]*' # Metadata types string: description: | The basic string type (not a specific format). This should allow any free-form string. - pattern: ".*" + pattern: '.*' integer: description: | An integer which may be positive or negative. - pattern: "[+-]?\d+" + pattern: '[+-]?\d+' number: description: | A number which may be an integer or float, positive or negative. - pattern: "[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)" + pattern: '[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)' boolean: description: | A boolean. Must be either "true" or "false". - pattern: "(true|false)" + pattern: '(true|false)' # String formats date: description: | A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. - pattern: "[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})" + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})' datetime: description: | A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. - pattern: "[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})" + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})' time: description: | A time in the form `"hh:mm:ss"`. - pattern: "(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]" + pattern: '(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]' unit: description: | A unit. @@ -54,7 +54,7 @@ unit: Currently this matches any string. TODO: Somehow reference the actual unit options in the Units appendix. - pattern: ".*" + pattern: '.*' stimuli_relative: description: | A path to a stimulus file, relative to a `/stimuli` folder somewhere. @@ -63,7 +63,7 @@ stimuli_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "stimuli/" (a relative path starting with the stimuli folder, rather than relative to that folder). - pattern: "(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+" + pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+' dataset_relative: description: | A path to a file, relative to the dataset folder. @@ -71,7 +71,7 @@ dataset_relative: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path). - pattern: "(?!/)[0-9a-zA-Z/\_\-\.]+" + pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+' participant_relative: description: | A path to a file, relative to the participant's folder in the dataset. @@ -80,15 +80,15 @@ participant_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "sub/" (a relative path starting with the participant folder, rather than relative to that folder). - pattern: "(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+" + pattern: '(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+' rrid: description: | A [research resource identifier](https://scicrunch.org/resources). - pattern: "RRID:.+_.+" + pattern: 'RRID:.+_.+' uri: description: | A uniform resource indicator. - pattern: "(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?" + pattern: '(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' bids_uri: description: | A BIDS uniform resource indicator. @@ -96,4 +96,4 @@ bids_uri: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid URI, starting with "bids:". - pattern: "bids:[0-9a-zA-Z/#:\?\_\-\.]+" + pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+' From cc502c55d06bd75e13db0198d1f3a27539cd28d3 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:06:50 -0500 Subject: [PATCH 18/30] Draft a test for formats. --- .../schemacode/tests/test_schema.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index 5d1400a778..fe289f9271 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -27,3 +27,32 @@ def test_object_definitions(schema_obj): assert "name" in obj_def.keys(), obj_key assert "description" in obj_def.keys(), obj_key + + +def test_formats(schema_obj): + """Test valid string patterns allowed by the specification.""" + import re + + # Check that valid strings match the search pattern. + GOOD_PATTERNS = { + "label": ["01", "test", "test01", "Test01"], + "index": ["01", "1", "10000", "00001"], + } + for pattern, test_list in GOOD_PATTERNS.items(): + pattern_format = schema_obj["objects"]["formats"][pattern]["pattern"] + search_pattern = "^" + pattern_format + "$" + search = re.compile(search_pattern) + for test_string in test_list: + assert bool(search.fullmatch(test_string)) + + # Check that invalid strings do not match the search pattern. + BAD_PATTERNS = { + "label": ["test_01", "!", "010101-"], + "index": ["test", "0.1", "0-1", "0_1"], + } + for pattern, test_list in BAD_PATTERNS.items(): + pattern_format = schema_obj["objects"]["formats"][pattern]["pattern"] + search_pattern = "^" + pattern_format + "$" + search = re.compile(search_pattern) + for test_string in test_list: + assert not bool(search.fullmatch(test_string)) From 30e73588d33fa9a377156ac7eb3e34e45be0ef39 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:09:13 -0500 Subject: [PATCH 19/30] Add names for tests. They probably won't ever be used. --- src/schema/objects/formats.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 5323fa532c..2e7b1fc7d9 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -2,50 +2,60 @@ # This file defines valid patterns for different formats # Entity patterns label: + name: Label description: | Freeform labels without special characters. pattern: '[0-9a-zA-Z]+' index: + name: Index description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. An index may not be all zeros. pattern: '[0-9]*[1-9]+[0-9]*' # Metadata types string: + name: String description: | The basic string type (not a specific format). This should allow any free-form string. pattern: '.*' integer: + name: Integer description: | An integer which may be positive or negative. pattern: '[+-]?\d+' number: + name: Number description: | A number which may be an integer or float, positive or negative. pattern: '[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)' boolean: + name: Boolean description: | A boolean. Must be either "true" or "false". pattern: '(true|false)' # String formats date: + name: Date description: | A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})' datetime: + name: Datetime description: | A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})' time: + name: Time description: | A time in the form `"hh:mm:ss"`. pattern: '(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]' unit: + name: A standardized unit description: | A unit. SI units in CMIXF formatting are RECOMMENDED @@ -56,6 +66,7 @@ unit: TODO: Somehow reference the actual unit options in the Units appendix. pattern: '.*' stimuli_relative: + name: Path relative to the stimuli folder description: | A path to a stimulus file, relative to a `/stimuli` folder somewhere. @@ -65,6 +76,7 @@ stimuli_relative: (a relative path starting with the stimuli folder, rather than relative to that folder). pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+' dataset_relative: + name: Path relative to the BIDS dataset folder description: | A path to a file, relative to the dataset folder. @@ -73,6 +85,7 @@ dataset_relative: without starting with "/" (an absolute path). pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+' participant_relative: + name: Path relative to the participant folder description: | A path to a file, relative to the participant's folder in the dataset. @@ -82,14 +95,17 @@ participant_relative: (a relative path starting with the participant folder, rather than relative to that folder). pattern: '(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+' rrid: + name: Research resource identifier description: | A [research resource identifier](https://scicrunch.org/resources). pattern: 'RRID:.+_.+' uri: + name: Uniform resource indicator description: | A uniform resource indicator. pattern: '(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' bids_uri: + name: BIDS uniform resource indicator description: | A BIDS uniform resource indicator. From fa6256ce4eee084241007d69b71ea56c887e89fa Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:25:36 -0500 Subject: [PATCH 20/30] Add more valid pattern tests. --- .../schemacode/tests/test_schema.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index fe289f9271..3da43814fb 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -37,13 +37,38 @@ def test_formats(schema_obj): GOOD_PATTERNS = { "label": ["01", "test", "test01", "Test01"], "index": ["01", "1", "10000", "00001"], + "string": ["any string is valid."], + "integer": ["5", "10", "-5", "-10"], + "number": ["5", "3.14", "-5", "-3.14"], + "boolean": ["true", "false"], + "date": ["2022-01-05", "2022-01-05UTC", "2022-50-50"], + "datetime": [ + "2022-01-05T13:16:30", + "2022-01-05T13:16:30.05", + "2022-01-05T13:16:30UTC", + "2022-01-05T13:16:30.05UTC", + ], + "time": ["13:16:30"], + "unit": ["any string is valid."], + "stimuli_relative": ["any/arbitrary/path/file.txt"], + "dataset_relative": ["any/arbitrary/path/file.txt"], + "participant_relative": ["any/arbitrary/path/file.txt"], + "rrid": ["RRID:SCR_017398"], + "uri": ["foo://example.com:8042/over/there?name=ferret#nose"], + "bids_uri": [ + "bids::sub-01/fmap/sub-01_dir-AP_epi.nii.gz", + "bids:ds000001:sub-02/anat/sub-02_T1w.nii.gz", + "bids:myderivatives:sub-03/func/sub-03_task-rest_space-MNI152_bold.nii.gz", + ], } for pattern, test_list in GOOD_PATTERNS.items(): pattern_format = schema_obj["objects"]["formats"][pattern]["pattern"] search_pattern = "^" + pattern_format + "$" search = re.compile(search_pattern) for test_string in test_list: - assert bool(search.fullmatch(test_string)) + assert bool( + search.fullmatch(test_string) + ), f"'{test_string}' is not a valid match for the pattern '{search.pattern}'" # Check that invalid strings do not match the search pattern. BAD_PATTERNS = { From b57d8916ed6347a4cbcdc336f65fec29419db140 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:26:29 -0500 Subject: [PATCH 21/30] Fix mistake in date/datetime patterns. --- src/schema/objects/formats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 2e7b1fc7d9..35057a8281 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -41,14 +41,14 @@ date: description: | A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. - pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}?([A-Z]{3,4})' + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}([A-Z]{3,4})?' datetime: name: Datetime description: | A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. - pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}?(\.[0-9]{1,6})?([A-Z]{3,4})' + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?([A-Z]{3,4})?' time: name: Time description: | From a9abc49730ddd8787804fac45d18b7569f8dbfeb Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:28:53 -0500 Subject: [PATCH 22/30] Make time part of datetimes more restrictive. --- src/schema/objects/formats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 35057a8281..a7d9df6cf4 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -48,7 +48,7 @@ datetime: A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. - pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?([A-Z]{3,4})?' + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9](\.[0-9]{1,6})?([A-Z]{3,4})?' time: name: Time description: | From 755285a11a5420174160781b7847f0f363ebd31d Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:38:08 -0500 Subject: [PATCH 23/30] Make datetimes more robust. --- src/schema/objects/formats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index a7d9df6cf4..ddf26e2eb0 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -41,14 +41,14 @@ date: description: | A date in the form `"YYYY-MM-DD[Z]"`, where [Z] is an optional, valid timezone code. - pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}([A-Z]{3,4})?' + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}([A-Z]{2,4})?' datetime: name: Datetime description: | A datetime in the form `"YYYY-MM-DDThh:mm:ss[.000000][Z]"`, where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. - pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9](\.[0-9]{1,6})?([A-Z]{3,4})?' + pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9](\.[0-9]{1,6})?([A-Z]{2,4})?' time: name: Time description: | From b92aa0f8c2ab9319b62c2cc253fe814dc0ef9f9d Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:38:17 -0500 Subject: [PATCH 24/30] Add more bad-pattern checks. --- .../schemacode/tests/test_schema.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index 3da43814fb..25964c4ca6 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -44,7 +44,8 @@ def test_formats(schema_obj): "date": ["2022-01-05", "2022-01-05UTC", "2022-50-50"], "datetime": [ "2022-01-05T13:16:30", - "2022-01-05T13:16:30.05", + "2022-01-05T13:16:30.5", + "2022-01-05T13:16:30.000005", "2022-01-05T13:16:30UTC", "2022-01-05T13:16:30.05UTC", ], @@ -74,6 +75,38 @@ def test_formats(schema_obj): BAD_PATTERNS = { "label": ["test_01", "!", "010101-"], "index": ["test", "0.1", "0-1", "0_1"], + "string": [], + "integer": ["3.14", "-3.14", "1.", "-1.", "string", "s1", "1%"], + "number": ["string", "1%"], + "boolean": ["True", "False", "T", "F"], + "date": [ + "05-01-2022", # MM-DD-YYYY or DD-MM-YYYY + "05/01/2022", # MM/DD/YYYY or DD/MM/YYYY + ], + "datetime": [ + "2022-01-05T13:16:30.1000005", # too many decimal points + "2022-01-05T13:16:30U", # time zone too short + "2022-01-05T13:16:30UTCUTC", # time zone too long + "2022-01-05T34:10:10", # invalid time + ], + "time": [ + "34:10:10", # invalid time + ], + "unit": [], + "stimuli_relative": [ + "/path/with/starting/slash/file.txt", + "stimuli/path/file.txt", + ], + "dataset_relative": [ + "/path/with/starting/slash/file.txt", + ], + "participant_relative": [ + "/path/with/starting/slash/file.txt", + "sub-01/path/file.txt", + ], + "rrid": [], + "uri": [], + "bids_uri": [], } for pattern, test_list in BAD_PATTERNS.items(): pattern_format = schema_obj["objects"]["formats"][pattern]["pattern"] From 9e2b6cf4655452a48e91cf4573364981674a2f15 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:38:31 -0500 Subject: [PATCH 25/30] Apply suggestions from code review Co-authored-by: Chris Markiewicz --- src/schema/objects/formats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index a7d9df6cf4..bc7c7cc1a2 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -28,7 +28,7 @@ number: name: Number description: | A number which may be an integer or float, positive or negative. - pattern: '[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)' + pattern: '[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?' boolean: name: Boolean description: | @@ -53,7 +53,7 @@ time: name: Time description: | A time in the form `"hh:mm:ss"`. - pattern: '(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]' + pattern: '(?:2[0-3]|[01]?[0-9]):[0-5][0-9]:[0-5][0-9]' unit: name: A standardized unit description: | From e5e356338d95f1f8ea472d7ec3365eb2347f59e4 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 Jan 2022 13:42:02 -0500 Subject: [PATCH 26/30] Test the new pattern changes. --- .../schemacode/tests/test_schema.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index 25964c4ca6..1dd4e26f0d 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -39,17 +39,28 @@ def test_formats(schema_obj): "index": ["01", "1", "10000", "00001"], "string": ["any string is valid."], "integer": ["5", "10", "-5", "-10"], - "number": ["5", "3.14", "-5", "-3.14"], + "number": [ + "5", # integers are allowed + "3.14", # floats too + "-5", # they can be negative + "-3.14", + "1e3", # scientific notation is allowed + "-2.1E+5", + ], "boolean": ["true", "false"], "date": ["2022-01-05", "2022-01-05UTC", "2022-50-50"], "datetime": [ "2022-01-05T13:16:30", - "2022-01-05T13:16:30.5", - "2022-01-05T13:16:30.000005", - "2022-01-05T13:16:30UTC", + "2022-01-05T13:16:30.5", # subsecond resolution is allowed + "2022-01-05T13:16:30.000005", # up to 6 decimal points + "2022-01-05T13:16:30UTC", # timezones are allowed "2022-01-05T13:16:30.05UTC", ], - "time": ["13:16:30"], + "time": [ + "13:16:30", + "09:00:00", + "9:00:00", # leading zeros are not required for hours + ], "unit": ["any string is valid."], "stimuli_relative": ["any/arbitrary/path/file.txt"], "dataset_relative": ["any/arbitrary/path/file.txt"], From aea7982135bfbbeb3866e749fd9e301f92e0c6de Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 2 Feb 2022 14:30:35 -0500 Subject: [PATCH 27/30] Apply suggestions from code review Thanks for the suggestions @yarikoptic. They're great! Co-authored-by: Yaroslav Halchenko --- .../schemacode/schemacode/tests/test_schema.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index 1dd4e26f0d..b4dbc83357 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -84,10 +84,10 @@ def test_formats(schema_obj): # Check that invalid strings do not match the search pattern. BAD_PATTERNS = { - "label": ["test_01", "!", "010101-"], + "label": ["test_01", "!", "010101-", "01-01", "-01"], "index": ["test", "0.1", "0-1", "0_1"], "string": [], - "integer": ["3.14", "-3.14", "1.", "-1.", "string", "s1", "1%"], + "integer": ["3.14", "-3.14", "1.", "-1.", "string", "s1", "1%", "one"], "number": ["string", "1%"], "boolean": ["True", "False", "T", "F"], "date": [ @@ -102,6 +102,10 @@ def test_formats(schema_obj): ], "time": [ "34:10:10", # invalid time + "24:00:00", # should be 00:00:00 + "00:60:00", # should be 01:00:00 + "00:00:60", # should be 00:01:00 + "01:23", # lacks either hours or seconds ], "unit": [], "stimuli_relative": [ @@ -115,13 +119,17 @@ def test_formats(schema_obj): "/path/with/starting/slash/file.txt", "sub-01/path/file.txt", ], - "rrid": [], - "uri": [], + "rrid": [ + "RRID:", # empty one + ], + "uri": [ + "ftp://" # lacks anything but protocol + ], "bids_uri": [], } for pattern, test_list in BAD_PATTERNS.items(): pattern_format = schema_obj["objects"]["formats"][pattern]["pattern"] - search_pattern = "^" + pattern_format + "$" + search_pattern = f"^{pattern_format}$" search = re.compile(search_pattern) for test_string in test_list: assert not bool(search.fullmatch(test_string)) From 84061cc8f6964fc161508ea9c5379b567917d6b6 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 2 Feb 2022 14:38:13 -0500 Subject: [PATCH 28/30] Reorder formats alphabetically within each section. --- src/schema/objects/formats.yaml | 116 ++++++++++++++++---------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index b62e370476..dab370d2c7 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -1,24 +1,24 @@ --- # This file defines valid patterns for different formats # Entity patterns -label: - name: Label - description: | - Freeform labels without special characters. - pattern: '[0-9a-zA-Z]+' index: name: Index description: | Non-negative, non-zero integers, optionally prefixed with leading zeros for sortability. An index may not be all zeros. pattern: '[0-9]*[1-9]+[0-9]*' +label: + name: Label + description: | + Freeform labels without special characters. + pattern: '[0-9a-zA-Z]+' # Metadata types -string: - name: String +boolean: + name: Boolean description: | - The basic string type (not a specific format). - This should allow any free-form string. - pattern: '.*' + A boolean. + Must be either "true" or "false". + pattern: '(true|false)' integer: name: Integer description: | @@ -29,13 +29,31 @@ number: description: | A number which may be an integer or float, positive or negative. pattern: '[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?' -boolean: - name: Boolean +string: + name: String description: | - A boolean. - Must be either "true" or "false". - pattern: '(true|false)' + The basic string type (not a specific format). + This should allow any free-form string. + pattern: '.*' # String formats +bids_uri: + name: BIDS uniform resource indicator + description: | + A BIDS uniform resource indicator. + + The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid URI, + starting with "bids:". + pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+' +dataset_relative: + name: Path relative to the BIDS dataset folder + description: | + A path to a file, relative to the dataset folder. + + The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path). + pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+' date: name: Date description: | @@ -49,6 +67,31 @@ datetime: where [.000000] is an optional subsecond resolution between 1 and 6 decimal points, and [Z] is an optional, valid timezone code. pattern: '[0-9]{4}-[0-9]{2}-[0-9]{2}T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9](\.[0-9]{1,6})?([A-Z]{2,4})?' +participant_relative: + name: Path relative to the participant folder + description: | + A path to a file, relative to the participant's folder in the dataset. + + The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path) or "sub/" + (a relative path starting with the participant folder, rather than relative to that folder). + pattern: '(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+' +rrid: + name: Research resource identifier + description: | + A [research resource identifier](https://scicrunch.org/resources). + pattern: 'RRID:.+_.+' +stimuli_relative: + name: Path relative to the stimuli folder + description: | + A path to a stimulus file, relative to a `/stimuli` folder somewhere. + + The validation for this format is minimal. + It simply ensures that the value is a string with any characters that may appear in a valid path, + without starting with "/" (an absolute path) or "stimuli/" + (a relative path starting with the stimuli folder, rather than relative to that folder). + pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+' time: name: Time description: | @@ -65,51 +108,8 @@ unit: TODO: Somehow reference the actual unit options in the Units appendix. pattern: '.*' -stimuli_relative: - name: Path relative to the stimuli folder - description: | - A path to a stimulus file, relative to a `/stimuli` folder somewhere. - - The validation for this format is minimal. - It simply ensures that the value is a string with any characters that may appear in a valid path, - without starting with "/" (an absolute path) or "stimuli/" - (a relative path starting with the stimuli folder, rather than relative to that folder). - pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+' -dataset_relative: - name: Path relative to the BIDS dataset folder - description: | - A path to a file, relative to the dataset folder. - - The validation for this format is minimal. - It simply ensures that the value is a string with any characters that may appear in a valid path, - without starting with "/" (an absolute path). - pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+' -participant_relative: - name: Path relative to the participant folder - description: | - A path to a file, relative to the participant's folder in the dataset. - - The validation for this format is minimal. - It simply ensures that the value is a string with any characters that may appear in a valid path, - without starting with "/" (an absolute path) or "sub/" - (a relative path starting with the participant folder, rather than relative to that folder). - pattern: '(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+' -rrid: - name: Research resource identifier - description: | - A [research resource identifier](https://scicrunch.org/resources). - pattern: 'RRID:.+_.+' uri: name: Uniform resource indicator description: | A uniform resource indicator. pattern: '(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' -bids_uri: - name: BIDS uniform resource indicator - description: | - A BIDS uniform resource indicator. - - The validation for this format is minimal. - It simply ensures that the value is a string with any characters that may appear in a valid URI, - starting with "bids:". - pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+' From df0dd34b78ff8e8182bd20814ed21b4384797893 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 2 Feb 2022 14:42:08 -0500 Subject: [PATCH 29/30] Run black on test and improve check. --- tools/schemacode/schemacode/tests/test_schema.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index b4dbc83357..fd04249a48 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -105,7 +105,7 @@ def test_formats(schema_obj): "24:00:00", # should be 00:00:00 "00:60:00", # should be 01:00:00 "00:00:60", # should be 00:01:00 - "01:23", # lacks either hours or seconds + "01:23", # lacks either hours or seconds ], "unit": [], "stimuli_relative": [ @@ -123,7 +123,7 @@ def test_formats(schema_obj): "RRID:", # empty one ], "uri": [ - "ftp://" # lacks anything but protocol + "ftp://", # lacks anything but protocol ], "bids_uri": [], } @@ -132,4 +132,6 @@ def test_formats(schema_obj): search_pattern = f"^{pattern_format}$" search = re.compile(search_pattern) for test_string in test_list: - assert not bool(search.fullmatch(test_string)) + assert not bool( + search.fullmatch(test_string) + ), f"'{test_string}' should not be a valid match for the pattern '{search.pattern}'" From 4a0ad3f5ae862a9150c038885ec4f011b002dfd9 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 3 Feb 2022 09:49:10 -0500 Subject: [PATCH 30/30] Comment out failing pattern, but add a note about it. --- tools/schemacode/schemacode/tests/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/schemacode/schemacode/tests/test_schema.py b/tools/schemacode/schemacode/tests/test_schema.py index fd04249a48..9e169d0f1c 100644 --- a/tools/schemacode/schemacode/tests/test_schema.py +++ b/tools/schemacode/schemacode/tests/test_schema.py @@ -123,7 +123,7 @@ def test_formats(schema_obj): "RRID:", # empty one ], "uri": [ - "ftp://", # lacks anything but protocol + # "ftp://", # lacks anything but protocol. This should fail, but doesn't ATM. ], "bids_uri": [], }