From 21a48d02a8f783379fab0a11d346105e817fad4f Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Mon, 29 Jul 2024 19:49:47 -0700 Subject: [PATCH] Update schema.py --- src/meds/schema.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/meds/schema.py b/src/meds/schema.py index 68d7fc0..4a9403e 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -18,12 +18,15 @@ # - data/ # A (possibly nested) folder containing multiple parquet files containing patient event data following the events_schema folder. # glob("data/**/*.parquet") is the recommended way for obtaining all patient event files. -# - dataset_metadata.json -# Dataset level metadata containing information about the ETL used, data version, etc -# - (Optional) code_metadata.parquet -# Code level metadata containing information about the code descriptions, standard mappings, etc -# - (Optional) patient_split.csv -# A specification of patient splits that should be used. +# - metadata/ +# A (possibly nested) folder containing metadata for the dataset. Any files within this folder are optional, but we also specify the format of 3 particularly +# important metadata. +# * dataset_metadata.json +# Dataset level metadata containing information about the ETL used, data version, etc +# * (Optional) code_metadata.parquet +# Code level metadata containing information about the code descriptions, standard mappings, etc +# * (Optional) patient_split.csv +# A specification of patient splits that should be used. ############################################################