From 37c32f5c067f997f0e9f753f2fbcd1069e2a2896 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 20 Jun 2024 00:16:10 -0400 Subject: [PATCH 1/2] Add the notion that example layout can in fact be a valid BIDS dataset This reverts commit a3c12f89bbca7a57f77832d146a808f6c6ca0194 where I have tried to introduce it in https://github.com/bids-standard/bids-specification/pull/1741 but it required a little more of further detailing. --- src/common-principles.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/common-principles.md b/src/common-principles.md index da87a35e06..a72cacf8f7 100644 --- a/src/common-principles.md +++ b/src/common-principles.md @@ -278,7 +278,7 @@ A guide for using macros can be found at --> {{ MACROS___make_filetree_example( { - "my_project-1": { + "my_dataset-1": { "sourcedata": { "dicoms": {}, "raw": { @@ -294,7 +294,9 @@ A guide for using macros can be found at "pipeline_1": {}, "pipeline_2": {}, "...": "", - } + }, + "dataset_description.json": "", + "...": "", } } ) }} @@ -303,7 +305,7 @@ In this example, `sourcedata/dicoms` is not nested inside `sourcedata/raw`, **and only the `sourcedata/raw` subdirectory** is a BIDS-compliant dataset among `sourcedata/` subfolders. The subdirectories of `derivatives` MAY be BIDS-compliant derivatives datasets (see [Non-compliant derivatives](#non-compliant-derivatives) for further discussion). -The above example is just a convention useful for organizing source, raw BIDS, and derived BIDS data while maintaining BIDS compliance of the raw data directory. +The above example is a fully compliant BIDS dataset, providing a convention useful for organizing source, raw BIDS, and derived BIDS data while maintaining overall BIDS compliance. When using this convention it is RECOMMENDED to set the `SourceDatasets` field in `dataset_description.json` of each subdirectory of `derivatives` to: From 2d9bfdf5b0b97eb896fb225b8e3f791c0c500daa Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 20 Jun 2024 03:57:23 -0400 Subject: [PATCH 2/2] Move and extend description and definition of DatasetType "project" --- src/common-principles.md | 92 ++++++++++++++-------------- src/schema/objects/enums.yaml | 5 ++ src/schema/objects/metadata.yaml | 1 + src/schema/rules/checks/dataset.yaml | 3 +- 4 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/common-principles.md b/src/common-principles.md index a72cacf8f7..e899cf8199 100644 --- a/src/common-principles.md +++ b/src/common-principles.md @@ -270,51 +270,6 @@ However, in the case that these data are to be included: We RECOMMEND including the PDF print-out with the actual sequence parameters generated by the scanner in the `sourcedata` directory. -Alternatively one can organize their data in the following way - - -{{ MACROS___make_filetree_example( - { - "my_dataset-1": { - "sourcedata": { - "dicoms": {}, - "raw": { - "sub-01": {}, - "sub-02": {}, - "...": "", - "dataset_description.json": "", - "...": "", - }, - "..." : "", - }, - "derivatives": { - "pipeline_1": {}, - "pipeline_2": {}, - "...": "", - }, - "dataset_description.json": "", - "...": "", - } - } -) }} - -In this example, `sourcedata/dicoms` is not nested inside -`sourcedata/raw`, **and only the `sourcedata/raw` subdirectory** is a BIDS-compliant dataset among `sourcedata/` subfolders. -The subdirectories of `derivatives` MAY be BIDS-compliant derivatives datasets -(see [Non-compliant derivatives](#non-compliant-derivatives) for further discussion). -The above example is a fully compliant BIDS dataset, providing a convention useful for organizing source, raw BIDS, and derived BIDS data while maintaining overall BIDS compliance. -When using this convention it is RECOMMENDED to set the `SourceDatasets` -field in `dataset_description.json` of each subdirectory of `derivatives` to: - -```JSON -{ - "SourceDatasets": [ {"URL": "../../sourcedata/raw/"} ] -} -``` - !!! danger "Caution" Sharing source data may help amend errors and missing data discovered @@ -438,6 +393,53 @@ In particular, if a BIDS dataset contains a `derivatives/` subdirectory, the contents of that directory may be a heterogeneous mix of BIDS Derivatives datasets and non-compliant derivatives. +## Project dataset + +BIDS allows one to organize the data for the entire project (original source data, raw BIDS, derivatives) as a valid BIDS dataset in the following way + + +{{ MACROS___make_filetree_example( + { + "my_project-1": { + "sourcedata": { + "dicoms": {}, + "raw": { + "sub-01": {}, + "sub-02": {}, + "...": "", + "dataset_description.json": "", + "...": "", + }, + "..." : "", + }, + "derivatives": { + "pipeline_1": {}, + "pipeline_2": {}, + "...": "", + }, + "dataset_description.json": "", + "...": "", + } + } +) }} + +In this example, `sourcedata/dicoms` is not nested inside +`sourcedata/raw`, **and only the `sourcedata/raw` subdirectory** is a BIDS-compliant dataset among `sourcedata/` subfolders. +The subdirectories of `derivatives` MAY be BIDS-compliant derivatives datasets +(see [Non-compliant derivatives](#non-compliant-derivatives) for further discussion). +The above example is a fully compliant BIDS dataset, providing a convention useful for organizing source, raw BIDS, and derived BIDS data while maintaining overall BIDS compliance. +When using this convention, `dataset_description.json` MUST have `DatasetType` to be set to `"project"`. It is also RECOMMENDED to set the `SourceDatasets` +field in `dataset_description.json` of each subdirectory of `derivatives` to: + +```JSON +{ + "SourceDatasets": [ {"URL": "../../sourcedata/raw/"} ] +} +``` + ## File format specification ### Imaging files diff --git a/src/schema/objects/enums.yaml b/src/schema/objects/enums.yaml index 2f63006460..e04708b45c 100644 --- a/src/schema/objects/enums.yaml +++ b/src/schema/objects/enums.yaml @@ -1296,6 +1296,11 @@ derivative: display_name: derivative description: | A derived BIDS dataset. +project: + value: project + display_name: project + description: | + A project BIDS dataset. balanced: value: balanced display_name: balanced diff --git a/src/schema/objects/metadata.yaml b/src/schema/objects/metadata.yaml index 70926904fc..0913647d17 100644 --- a/src/schema/objects/metadata.yaml +++ b/src/schema/objects/metadata.yaml @@ -553,6 +553,7 @@ DatasetType: enum: - $ref: objects.enums.raw.value - $ref: objects.enums.derivative.value + - $ref: objects.enums.project.value DecayCorrectionFactor: name: DecayCorrectionFactor display_name: Decay Correction Factor diff --git a/src/schema/rules/checks/dataset.yaml b/src/schema/rules/checks/dataset.yaml index 91704d32e4..50db7b506e 100644 --- a/src/schema/rules/checks/dataset.yaml +++ b/src/schema/rules/checks/dataset.yaml @@ -6,10 +6,11 @@ SubjectFolders: issue: code: SUBJECT_FOLDERS message: | - There are no subject directories (labeled "sub-*") in the root of this dataset. + There are no subject directories (labeled "sub-*") in the root of this raw BIDS dataset. level: error selectors: - path == '/dataset_description.json' + - dataset.dataset_description.DatasetType == "raw" checks: - length(dataset.subjects.sub_dirs) > 0