From ae6d26992620b13e0f50f6adf86929b61753ddf4 Mon Sep 17 00:00:00 2001
From: Matthew McDermott <mattmcdermott8@gmail.com>
Date: Tue, 30 Jul 2024 15:19:17 -0400
Subject: [PATCH] Adjust naming convention to minimize import and variable name
 conflicts.

---
 src/meds/__init__.py | 14 +++++++-------
 src/meds/schema.py   | 10 +++++-----
 tests/test_schema.py | 29 +++++++++++++++--------------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/meds/__init__.py b/src/meds/__init__.py
index 8853647..3d8d36c 100644
--- a/src/meds/__init__.py
+++ b/src/meds/__init__.py
@@ -1,22 +1,22 @@
 from meds._version import __version__  # noqa
 
 from .schema import (
-    data, label, Label, train_split, tuning_split, held_out_split, patient_split, code_metadata,
-    dataset_metadata, CodeMetadata, DatasetMetadata, birth_code, death_code
+    data_schema, label_schema, Label, train_split, tuning_split, held_out_split, patient_split_schema,
+    code_metadata_schema, dataset_metadata_schema, CodeMetadata, DatasetMetadata, birth_code, death_code
 )
 
 
 # List all objects that we want to export
 _exported_objects = {
-    'data': data,
-    'label': label,
+    'data_schema': data_schema,
+    'label_schema': label_schema,
     'Label': Label,
     'train_split': train_split,
     'tuning_split': tuning_split,
     'held_out_split': held_out_split,
-    'patient_split': patient_split,
-    'code_metadata': code_metadata,
-    'dataset_metadata': dataset_metadata,
+    'patient_split_schema': patient_split_schema,
+    'code_metadata_schema': code_metadata_schema,
+    'dataset_metadata_schema': dataset_metadata_schema,
     'CodeMetadata': CodeMetadata,
     'DatasetMetadata': DatasetMetadata,
     'birth_code': birth_code,
diff --git a/src/meds/schema.py b/src/meds/schema.py
index a7b67b4..5b263f4 100644
--- a/src/meds/schema.py
+++ b/src/meds/schema.py
@@ -26,7 +26,7 @@
 birth_code = "MEDS_BIRTH"
 death_code = "MEDS_DEATH"
 
-def data(custom_properties=[]):
+def data_schema(custom_properties=[]):
     return pa.schema(
         [
             ("patient_id", pa.int64()),
@@ -45,7 +45,7 @@ def data(custom_properties=[]):
 # including the prediction time. Exclusive prediction times are not currently supported, but if you have a use
 # case for them please add a GitHub issue.
 
-label = pa.schema(
+label_schema = pa.schema(
     [
         ("patient_id", pa.int64()),
          # The patient who is being labeled.
@@ -83,7 +83,7 @@ def data(custom_properties=[]):
 tuning_split = "tuning" # For ML hyperparameter tuning. Also often called "validation" or "dev".
 held_out_split = "held_out" # For final ML evaluation. Also often called "test".
 
-patient_split = pa.schema(
+patient_split_schema = pa.schema(
     [
         ("patient_id", pa.int64()),
         ("split", pa.string()),
@@ -96,7 +96,7 @@ def data(custom_properties=[]):
 # This is a JSON schema.
 
 
-dataset_metadata = {
+dataset_metadata_schema = {
     "type": "object",
     "properties": {
         "dataset_name": {"type": "string"},
@@ -126,7 +126,7 @@ def data(custom_properties=[]):
 # The code metadata schema.
 # This is a parquet schema.
 
-def code_metadata(custom_per_code_properties=[]): 
+def code_metadata_schema(custom_per_code_properties=[]): 
     return pa.schema(
         [
             ("code", pa.string()),
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 168c4e9..b945909 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -5,7 +5,8 @@
 import pytest
 
 from meds import (
-    data, label, dataset_metadata, patient_split, code_metadata, train_split, tuning_split, held_out_split
+    data_schema, label_schema, dataset_metadata_schema, patient_split_schema, code_metadata_schema,
+    train_split, tuning_split, held_out_split
 )
 
 def test_data_schema():
@@ -23,7 +24,7 @@ def test_data_schema():
         }
     ]
 
-    schema = data([("text_value", pa.string())])
+    schema = data_schema([("text_value", pa.string())])
 
     table = pa.Table.from_pylist(raw_data, schema=schema)
     assert table.schema.equals(schema), "Patient schema does not match"
@@ -41,7 +42,7 @@ def test_code_metadata_schema():
         }
     ]
 
-    schema = code_metadata()
+    schema = code_metadata_schema()
 
     table = pa.Table.from_pylist(code_metadata, schema=schema)
     assert table.schema.equals(schema), "Code metadata schema does not match"
@@ -58,8 +59,8 @@ def test_patient_split_schema():
         {"patient_id": 123, "split": "special"},
     ]
 
-    table = pa.Table.from_pylist(patient_split_data, schema=patient_split)
-    assert table.schema.equals(patient_split), "Patient split schema does not match"
+    table = pa.Table.from_pylist(patient_split_data, schema=patient_split_schema)
+    assert table.schema.equals(patient_split_schema), "Patient split schema does not match"
 
 def test_label_schema():
     """
@@ -73,8 +74,8 @@ def test_label_schema():
             "boolean_value": True
         }
     ]
-    label_table = pa.Table.from_pylist(label_data, schema=label)
-    assert label_table.schema.equals(label), "Label schema does not match"
+    label_table = pa.Table.from_pylist(label_data, schema=label_schema)
+    assert label_table.schema.equals(label_schema), "Label schema does not match"
 
     label_data = [
         {
@@ -83,8 +84,8 @@ def test_label_schema():
             "integer_value": 4
         }
     ]
-    label_table = pa.Table.from_pylist(label_data, schema=label)
-    assert label_table.schema.equals(label), "Label schema does not match"
+    label_table = pa.Table.from_pylist(label_data, schema=label_schema)
+    assert label_table.schema.equals(label_schema), "Label schema does not match"
     
     label_data = [
         {
@@ -93,8 +94,8 @@ def test_label_schema():
             "float_value": 0.4
         }
     ]
-    label_table = pa.Table.from_pylist(label_data, schema=label)
-    assert label_table.schema.equals(label), "Label schema does not match"
+    label_table = pa.Table.from_pylist(label_data, schema=label_schema)
+    assert label_table.schema.equals(label_schema), "Label schema does not match"
     
     label_data = [
         {
@@ -103,8 +104,8 @@ def test_label_schema():
             "categorical_value": "text"
         }
     ]
-    label_table = pa.Table.from_pylist(label_data, schema=label)
-    assert label_table.schema.equals(label), "Label schema does not match"
+    label_table = pa.Table.from_pylist(label_data, schema=label_schema)
+    assert label_table.schema.equals(label_schema), "Label schema does not match"
 
 def test_dataset_metadata_schema():
     """
@@ -117,5 +118,5 @@ def test_dataset_metadata_schema():
         "etl_version": "1.0",
     }
 
-    jsonschema.validate(instance=metadata, schema=dataset_metadata)
+    jsonschema.validate(instance=metadata, schema=dataset_metadata_schema)
     assert True, "Dataset metadata schema validation failed"