From 2a2ad6c0379d3aa9889d81ae5d38a8327eb4e7c4 Mon Sep 17 00:00:00 2001
From: Ethan Steinberg <ethan.steinberg@gmail.com>
Date: Fri, 16 Aug 2024 05:55:33 -0700
Subject: [PATCH] Fixes

---
 README.md            | 13 +++++++++++--
 src/meds/__init__.py |  8 ++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4911e42..5800383 100644
--- a/README.md
+++ b/README.md
@@ -160,7 +160,7 @@ dataset_metadata = {
         "etl_name": {"type": "string"},
         "etl_version": {"type": "string"},
         "meds_version": {"type": "string"},
-        "created_at": {"type": "string"},
+        "created_at": {"type": "string"},  # Should be ISO 8601
     },
 }
 
@@ -174,7 +174,7 @@ DatasetMetadata = TypedDict(
         "etl_name": NotRequired[str],
         "etl_version": NotRequired[str],
         "meds_version": NotRequired[str],
-        "created_at": NotRequired[str],
+        "created_at": NotRequired[str],  # Should be ISO 8601
     },
     total=False,
 )
@@ -183,12 +183,21 @@ DatasetMetadata = TypedDict(
 #### The code metadata schema.
 
 ```python
+# Code metadata must contain at least one row for every unique code in the dataset
 def code_metadata(custom_per_code_properties=[]):
     return pa.schema(
         [
             ("code", pa.string()),
+
             ("description", pa.string()),
+
             ("parent_codes", pa.list(pa.string()),
+            # parent_codes must be a list of strings, each string being a higher level
+            # code that represents a generalization of the provided code. Parent codes
+            # can use any structure, but is recommended that they reference OMOP concepts
+            # whenever possible, to enable use of more generic labeling functions and OHDSI tools.
+            # OMOP concepts are referenced in these strings via the format "$VOCABULARY_NAME/$CONCEPT_NAME".
+            # For example: "ICD9CM/487.0" would be a reference to ICD9 code 487.0
         ] + custom_per_code_properties
     )
 
diff --git a/src/meds/__init__.py b/src/meds/__init__.py
index 8f5f392..9cde9d7 100644
--- a/src/meds/__init__.py
+++ b/src/meds/__init__.py
@@ -5,13 +5,17 @@
     DatasetMetadata,
     Label,
     birth_code,
+    code_field,
     code_metadata_schema,
     data_schema,
     dataset_metadata_schema,
     death_code,
     held_out_split,
     label_schema,
+    subject_id_dtype,
+    subject_id_field,
     subject_split_schema,
+    time_field,
     train_split,
     tuning_split,
 )
@@ -31,6 +35,10 @@
     "DatasetMetadata": DatasetMetadata,
     "birth_code": birth_code,
     "death_code": death_code,
+    "subject_id_field": subject_id_field,
+    "time_field": time_field,
+    "code_field": code_field,
+    "subject_id_dtype": subject_id_dtype,
 }
 
 __all__ = list(_exported_objects.keys())