From 2a2ad6c0379d3aa9889d81ae5d38a8327eb4e7c4 Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Fri, 16 Aug 2024 05:55:33 -0700 Subject: [PATCH] Fixes --- README.md | 13 +++++++++++-- src/meds/__init__.py | 8 ++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4911e42..5800383 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,7 @@ dataset_metadata = { "etl_name": {"type": "string"}, "etl_version": {"type": "string"}, "meds_version": {"type": "string"}, - "created_at": {"type": "string"}, + "created_at": {"type": "string"}, # Should be ISO 8601 }, } @@ -174,7 +174,7 @@ DatasetMetadata = TypedDict( "etl_name": NotRequired[str], "etl_version": NotRequired[str], "meds_version": NotRequired[str], - "created_at": NotRequired[str], + "created_at": NotRequired[str], # Should be ISO 8601 }, total=False, ) @@ -183,12 +183,21 @@ DatasetMetadata = TypedDict( #### The code metadata schema. ```python +# Code metadata must contain at least one row for every unique code in the dataset def code_metadata(custom_per_code_properties=[]): return pa.schema( [ ("code", pa.string()), + ("description", pa.string()), + ("parent_codes", pa.list(pa.string()), + # parent_codes must be a list of strings, each string being a higher level + # code that represents a generalization of the provided code. Parent codes + # can use any structure, but is recommended that they reference OMOP concepts + # whenever possible, to enable use of more generic labeling functions and OHDSI tools. + # OMOP concepts are referenced in these strings via the format "$VOCABULARY_NAME/$CONCEPT_NAME". + # For example: "ICD9CM/487.0" would be a reference to ICD9 code 487.0 ] + custom_per_code_properties ) diff --git a/src/meds/__init__.py b/src/meds/__init__.py index 8f5f392..9cde9d7 100644 --- a/src/meds/__init__.py +++ b/src/meds/__init__.py @@ -5,13 +5,17 @@ DatasetMetadata, Label, birth_code, + code_field, code_metadata_schema, data_schema, dataset_metadata_schema, death_code, held_out_split, label_schema, + subject_id_dtype, + subject_id_field, subject_split_schema, + time_field, train_split, tuning_split, ) @@ -31,6 +35,10 @@ "DatasetMetadata": DatasetMetadata, "birth_code": birth_code, "death_code": death_code, + "subject_id_field": subject_id_field, + "time_field": time_field, + "code_field": code_field, + "subject_id_dtype": subject_id_dtype, } __all__ = list(_exported_objects.keys())