From abcaf29c9662259bba2f7a90fd84351f1cffc062 Mon Sep 17 00:00:00 2001 From: Mohamed Attia Date: Mon, 5 Aug 2024 16:37:26 +0200 Subject: [PATCH 1/3] Drop extra records when merging different `TimeseriesData` instances. --- src/enlyze/api_clients/timeseries/models.py | 13 +++-- .../api_clients/timeseries/test_models.py | 58 +++++++++++++------ 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/enlyze/api_clients/timeseries/models.py b/src/enlyze/api_clients/timeseries/models.py index 59531db..632657f 100644 --- a/src/enlyze/api_clients/timeseries/models.py +++ b/src/enlyze/api_clients/timeseries/models.py @@ -77,16 +77,19 @@ def extend(self, other: "TimeseriesData") -> None: def merge(self, other: "TimeseriesData") -> "TimeseriesData": """Merge records from ``other`` into the existing records.""" - if not (slen := len(self.records)) == (olen := len(other.records)): + slen, olen = len(self.records), len(other.records) + if olen < slen: raise ValueError( - "Cannot merge. Number of records in both instances has to be the same," - f" trying to merge an instance with {olen} records into an instance" - f" with {slen} records." + "Cannot merge. Attempted to merge" + f" an instance with {olen} records into an instance with {slen}" + " records. The instance to merge must have a number" + " of records greater than or equal to the number of records of" + " the instance you're trying to merge into." ) self.columns.extend(other.columns[1:]) - for s, o in zip(self.records, other.records): + for s, o in zip(self.records, other.records[:slen]): if s[0] != o[0]: raise ValueError( "Cannot merge. Attempted to merge records " diff --git a/tests/enlyze/api_clients/timeseries/test_models.py b/tests/enlyze/api_clients/timeseries/test_models.py index 0fbc0a0..ec16c41 100644 --- a/tests/enlyze/api_clients/timeseries/test_models.py +++ b/tests/enlyze/api_clients/timeseries/test_models.py @@ -37,40 +37,60 @@ def timeseries_data_2(timestamp): ) +@pytest.fixture +def timeseries_data_3(timestamp): + return TimeseriesData( + columns=["time", "var3"], + records=[ + [timestamp.isoformat(), 5], + [(timestamp - timedelta(minutes=10)).isoformat(), 6], + [(timestamp - timedelta(minutes=10)).isoformat(), 7], + [(timestamp - timedelta(minutes=10)).isoformat(), 8], + ], + ) + + class TestTimeseriesData: - def test_merge(self, timeseries_data_1, timeseries_data_2): - timeseries_data_1_records_len = len(timeseries_data_1.records) - timeseries_data_1_columns_len = len(timeseries_data_1.columns) - timeseries_data_2_records_len = len(timeseries_data_2.records) - timeseries_data_2_columns_len = len(timeseries_data_2.columns) - expected_merged_record_len = len(timeseries_data_1.records[0]) + len( - timeseries_data_2.records[0][TIMESTAMP_OFFSET:] + @pytest.mark.parametrize( + "data_fixture,data_to_merge_fixture", + [ + ("timeseries_data_1", "timeseries_data_2"), + ("timeseries_data_1", "timeseries_data_3"), + ], + ) + def test_merge(self, request, data_fixture, data_to_merge_fixture): + data = request.getfixturevalue(data_fixture) + data_to_merge = request.getfixturevalue(data_to_merge_fixture) + data_records_len = len(data.records) + data_columns_len = len(data.columns) + data_to_merge_columns_len = len(data_to_merge.columns) + expected_merged_record_len = len(data.records[0]) + len( + data_to_merge.records[0][TIMESTAMP_OFFSET:] ) - merged = timeseries_data_1.merge(timeseries_data_2) + merged = data.merge(data_to_merge) - assert merged is timeseries_data_1 - assert ( - len(merged.records) - == timeseries_data_1_records_len - == timeseries_data_2_records_len - ) + assert merged is data + assert len(merged.records) == data_records_len assert ( len(merged.columns) - == timeseries_data_1_columns_len - + timeseries_data_2_columns_len - - TIMESTAMP_OFFSET + == data_columns_len + data_to_merge_columns_len - TIMESTAMP_OFFSET ) for r in merged.records: assert len(r) == expected_merged_record_len == len(merged.columns) - def test_merge_raises_number_of_records_mismatch( + def test_merge_raises_number_of_records_to_merge_less_than_existing( self, timeseries_data_1, timeseries_data_2 ): timeseries_data_2.records = timeseries_data_2.records[1:] with pytest.raises( - ValueError, match="Number of records in both instances has to be the same" + ValueError, + match=( + "The instance to merge must have a number of" + " records greater than or equal to the number" + " of records of the instance you're trying to merge into." + ), ): timeseries_data_1.merge(timeseries_data_2) From e302baddfbff549b4f3bf5dcfbbc29fcd142a15b Mon Sep 17 00:00:00 2001 From: Mohamed Attia Date: Mon, 5 Aug 2024 16:42:29 +0200 Subject: [PATCH 2/3] Format `pyproject.toml` with `pyproject-fmt`. --- pyproject.toml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e3a81d6..9aafb9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,15 +4,12 @@ requires = [ "setuptools", ] -[tool.setuptools.dynamic] -version = {attr = "enlyze._version.VERSION"} - [project] name = "enlyze" description = "Python SDK for interacting with the ENLYZE platform https://www.enlyze.com" readme = "README.rst" -license = {text = "MIT"} -authors = [{name = "ENLYZE GmbH", email = "hello@enlyze.com"},] +license = { text = "MIT" } +authors = [ { name = "ENLYZE GmbH", email = "hello@enlyze.com" } ] requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3 :: Only", @@ -21,21 +18,20 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] dynamic = [ - 'version', + "version", ] dependencies = [ "httpx", "pandas>=2", "pydantic>=2", ] -[project.optional-dependencies] -docs = [ +optional-dependencies.docs = [ "sphinx", "sphinx-rtd-theme", "sphinx-tabs", "sphinxcontrib-spelling", ] -lint = [ +optional-dependencies.lint = [ "bandit", "black", "flake8", @@ -45,7 +41,7 @@ lint = [ "safety", "tox-ini-fmt", ] -test = [ +optional-dependencies.test = [ "coverage", "hypothesis", "pandas-stubs", @@ -57,8 +53,11 @@ test = [ "respx", ] -[tool.mypy] -exclude="tests" +[tool.setuptools.dynamic] +version = { attr = "enlyze._version.VERSION" } [tool.isort] profile = "black" + +[tool.mypy] +exclude = "tests" From d4c3835efdfe81a051bbfbdbbb157152cb2e1f68 Mon Sep 17 00:00:00 2001 From: Mohamed Attia Date: Tue, 6 Aug 2024 10:39:01 +0200 Subject: [PATCH 3/3] Refactor the tests for `TimeseriesData.merge()`. --- .../api_clients/timeseries/test_models.py | 100 ++++++++++-------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/tests/enlyze/api_clients/timeseries/test_models.py b/tests/enlyze/api_clients/timeseries/test_models.py index ec16c41..7e6683c 100644 --- a/tests/enlyze/api_clients/timeseries/test_models.py +++ b/tests/enlyze/api_clients/timeseries/test_models.py @@ -1,66 +1,53 @@ +import itertools +import random from datetime import datetime, timedelta, timezone import pytest from enlyze.api_clients.timeseries.models import TimeseriesData -# We use this to skip columns that contain the timestamp assuming +# We use this to skip columns that contain the timestamp assuming # it starts at the beginning of the sequence. We also use it # when computing lengths to account for a timestamp column. TIMESTAMP_OFFSET = 1 +NOW = datetime.now(tz=timezone.utc) -@pytest.fixture -def timestamp(): - return datetime.now(tz=timezone.utc) +def _generate_timeseries_data(*, columns, number_of_records): + timeseries_columns = ["time"] + timeseries_columns.extend(columns) + counter = itertools.count(start=10) -@pytest.fixture -def timeseries_data_1(timestamp): return TimeseriesData( - columns=["time", "var1", "var2"], + columns=timeseries_columns, records=[ - [timestamp.isoformat(), 1, 2], - [(timestamp - timedelta(minutes=10)).isoformat(), 3, 4], - ], - ) - - -@pytest.fixture -def timeseries_data_2(timestamp): - return TimeseriesData( - columns=["time", "var3"], - records=[ - [timestamp.isoformat(), 5], - [(timestamp - timedelta(minutes=10)).isoformat(), 6], - ], - ) - - -@pytest.fixture -def timeseries_data_3(timestamp): - return TimeseriesData( - columns=["time", "var3"], - records=[ - [timestamp.isoformat(), 5], - [(timestamp - timedelta(minutes=10)).isoformat(), 6], - [(timestamp - timedelta(minutes=10)).isoformat(), 7], - [(timestamp - timedelta(minutes=10)).isoformat(), 8], + [ + (NOW - timedelta(minutes=next(counter))).isoformat(), + *[random.randint(1, 100) for _ in range(len(columns))], + ] + for _ in range(number_of_records) ], ) class TestTimeseriesData: @pytest.mark.parametrize( - "data_fixture,data_to_merge_fixture", + "data_parameters,data_to_merge_parameters", [ - ("timeseries_data_1", "timeseries_data_2"), - ("timeseries_data_1", "timeseries_data_3"), + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 3}, + ), ], ) - def test_merge(self, request, data_fixture, data_to_merge_fixture): - data = request.getfixturevalue(data_fixture) - data_to_merge = request.getfixturevalue(data_to_merge_fixture) + def test_merge(self, data_parameters, data_to_merge_parameters): + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) data_records_len = len(data.records) data_columns_len = len(data.columns) data_to_merge_columns_len = len(data_to_merge.columns) @@ -80,10 +67,21 @@ def test_merge(self, request, data_fixture, data_to_merge_fixture): for r in merged.records: assert len(r) == expected_merged_record_len == len(merged.columns) + @pytest.mark.parametrize( + "data_parameters,data_to_merge_parameters", + [ + ( + {"columns": ["var1", "var2"], "number_of_records": 2}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ], + ) def test_merge_raises_number_of_records_to_merge_less_than_existing( - self, timeseries_data_1, timeseries_data_2 + self, data_parameters, data_to_merge_parameters ): - timeseries_data_2.records = timeseries_data_2.records[1:] + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) + with pytest.raises( ValueError, match=( @@ -92,12 +90,24 @@ def test_merge_raises_number_of_records_to_merge_less_than_existing( " of records of the instance you're trying to merge into." ), ): - timeseries_data_1.merge(timeseries_data_2) + data.merge(data_to_merge) + @pytest.mark.parametrize( + "data_parameters,data_to_merge_parameters", + [ + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ], + ) def test_merge_raises_mismatched_timestamps( - self, timeseries_data_1, timeseries_data_2, timestamp + self, data_parameters, data_to_merge_parameters ): - timeseries_data_2.records[0][0] = (timestamp - timedelta(days=1)).isoformat() + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) + + data_to_merge.records[0][0] = (NOW - timedelta(days=1)).isoformat() with pytest.raises(ValueError, match="mismatched timestamps"): - timeseries_data_1.merge(timeseries_data_2) + data.merge(data_to_merge)