Skip to content

Commit

Permalink
Raise error when trying to resolve to display names with duplicates.
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-k committed Oct 1, 2024
1 parent c4907a0 commit 0b4200b
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 7 deletions.
8 changes: 8 additions & 0 deletions src/enlyze/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,11 @@ class ResamplingValidationError(EnlyzeError):
resampling interval is specified.
"""


class DuplicateDisplayNameError(EnlyzeError):
"""Variables with duplicate display names
Resolving variable UUIDs to display names would result in ambiguity because
multiple variables have the same display name.
"""
22 changes: 16 additions & 6 deletions src/enlyze/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pandas

from enlyze.errors import DuplicateDisplayNameError
from enlyze.schema import dataframe_ensure_schema


Expand Down Expand Up @@ -143,6 +144,19 @@ def _display_names_as_column_names(self, columns: list[str]) -> list[str]:
if var.display_name
}

distinct_display_names = set(uuid_to_display_name.values())
if len(uuid_to_display_name) != len(distinct_display_names):
maybe_duplicate_display_names = list(uuid_to_display_name.values())
for name in distinct_display_names:
maybe_duplicate_display_names.remove(name)

raise DuplicateDisplayNameError(
", ".join(
f"'{duplicate_display_name}'"
for duplicate_display_name in set(maybe_duplicate_display_names)
)
)

return [uuid_to_display_name.get(var_uuid, var_uuid) for var_uuid in columns]

def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
Expand All @@ -153,13 +167,11 @@ def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
<python:datetime-naive-aware>` :py:class:`datetime.datetime` localized in UTC.
:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name, fall back to UUID. Display names aren't
guaranteed to be unique, duplicate columns will not be returned.
UUIDs. If there is no display name, fall back to UUID.
:returns: Iterator over rows
"""

time_column, *variable_columns = self._columns

if use_display_names:
Expand All @@ -182,9 +194,7 @@ def to_dataframe(self, use_display_names: bool = False) -> pandas.DataFrame:
represented as a column named by its UUID.
:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name, fall back to UUID. Display names aren't
guaranteed to be unique, so the DataFrame may contain multiple columns with
the same name.
UUIDs. If there is no display name, fall back to UUID.
:returns: DataFrame with timeseries data indexed by time
Expand Down
24 changes: 23 additions & 1 deletion tests/enlyze/test_models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from dataclasses import replace
from datetime import datetime
from uuid import uuid4

import hypothesis.strategies as st
import pytest
from hypothesis import given

from enlyze.models import ProductionRun, ProductionRuns
from enlyze.errors import DuplicateDisplayNameError
from enlyze.models import ProductionRun, ProductionRuns, TimeseriesData, Variable


@given(runs=st.lists(st.from_type(ProductionRun), max_size=10))
Expand All @@ -28,3 +32,21 @@ def test_production_runs_to_dataframe_no_empty_columns_for_optional_dataclasses(

assert "quantity_total" not in df.columns
assert "average_throughput" in df.columns


@given(variable=st.builds(Variable, display_name=st.text(min_size=1)))
def test_timeseries_data_duplicate_display_names(variable):

variable_duplicate = replace(variable, uuid=uuid4())
variables = [variable, variable_duplicate]

data = TimeseriesData(
start=datetime.now(),
end=datetime.now(),
variables=variables,
_columns=["time", *[str(v.uuid) for v in variables]],
_records=[],
)

with pytest.raises(DuplicateDisplayNameError):
data.to_dataframe(use_display_names=True)

0 comments on commit 0b4200b

Please sign in to comment.