Skip to content

Commit

Permalink
Remove root directory + comment on units
Browse files Browse the repository at this point in the history
  • Loading branch information
EthanSteinberg authored Aug 29, 2024
1 parent 5bcbb38 commit f312a50
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/meds/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
numeric_value_field = "numeric_value"

subject_id_dtype = pa.int64()

# The time datatype must use "us" as units to match datetime.datetime's internal resolution
time_dtype = pa.timestamp("us")

code_dtype = pa.string()
numeric_value_dtype = pa.float32()

Expand Down Expand Up @@ -60,13 +63,12 @@ def data_schema(custom_properties=[]):
# case for them please add a GitHub issue.

prediction_time_field = "prediction_time"
prediction_time_dtype = pa.timestamp("us")

label_schema = pa.schema(
[
(subject_id_field, subject_id_dtype),
# The subject who is being labeled.
(prediction_time_field, prediction_time_dtype),
(prediction_time_field, time_dtype),
# The time the prediction is made.
# Machine learning models are allowed to use features that have timestamps less than or equal
# to this timestamp.
Expand Down Expand Up @@ -98,7 +100,7 @@ def data_schema(custom_properties=[]):

# The subject split schema.

subject_splits_filepath = "metadata/subject_splits.parquet"
subject_splits_filepath = "subject_splits.parquet"

train_split = "train" # For ML training.
tuning_split = "tuning" # For ML hyperparameter tuning. Also often called "validation" or "dev".
Expand All @@ -116,7 +118,7 @@ def data_schema(custom_properties=[]):
# The dataset metadata schema.
# This is a JSON schema.

dataset_metadata_filepath = "metadata/dataset.json"
dataset_metadata_filepath = "dataset.json"

dataset_metadata_schema = {
"type": "object",
Expand Down Expand Up @@ -150,7 +152,7 @@ def data_schema(custom_properties=[]):
# The code metadata schema.
# This is a parquet schema.

code_metadata_filepath = "metadata/codes.parquet"
code_metadata_filepath = "codes.parquet"

description_field = "description"
description_dtype = pa.string()
Expand Down

0 comments on commit f312a50

Please sign in to comment.