From a3b8e9325b46dfdebc55c4d1d156d23374f74596 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 4 Sep 2023 14:01:19 +0000 Subject: [PATCH] Enum value get consistently sorted --- src/data_common/dataset/table_management.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data_common/dataset/table_management.py b/src/data_common/dataset/table_management.py index c1a4a93..dbaf033 100644 --- a/src/data_common/dataset/table_management.py +++ b/src/data_common/dataset/table_management.py @@ -3,6 +3,7 @@ import pandas as pd from pandas.io.json import build_table_schema + from data_common.db import duck_query @@ -80,6 +81,9 @@ def enhance_field( field["constraints"]["enum"] = enum_value if isinstance(enum_value, EnumPlaceholder): field["constraints"]["enum"] = enum_value.process(col) + if isinstance(field["constraints"]["enum"], list): + # sort the enum values + field["constraints"]["enum"] = sorted(field["constraints"]["enum"]) return field @classmethod @@ -105,7 +109,6 @@ def get_table_schema( def update_table_schema( path: Path, existing_schema: SchemaValidator | None ) -> SchemaValidator: - if path.suffix == ".csv": df = pd.read_csv(path) elif path.suffix == ".parquet":