test(taps): Add benchmark test on map transforms via `pytest-benchmar…

…k` (#2011) * add benchmark test on map transforms via pytest-benchmark * add action to comment benchmark result * remove action to comment benchmark results
meltano · Oct 10, 2023 · 7675942 · 7675942
1 parent a82a5da
commit 7675942
Show file tree

Hide file tree

Showing 5 changed files with 143 additions and 28 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -52,6 +52,7 @@ jobs:
         - { session: tests,   python-version: "3.11", os: "ubuntu-latest", sqlalchemy: "1.*" }
         - { session: doctest, python-version: "3.10", os: "ubuntu-latest", sqlalchemy: "2.*" }
         - { session: mypy,    python-version: "3.8",  os: "ubuntu-latest", sqlalchemy: "2.*" }
+        - { session: benches, python-version: "3.10", os: "ubuntu-latest", sqlalchemy: "2.*" }
 
     steps:
     - name: Check out the repository
@@ -100,6 +101,13 @@ jobs:
         name: coverage-data
         path: ".coverage.*"
 
+    - name: Upload Benchmark Result
+      if: always() && (matrix.session == 'benches')
+      uses: actions/[email protected]
+      with:
+        name: benchmark_results
+        path: output.json
+
   tests-external:
     name: External Tests
     runs-on: ubuntu-latest

diff --git a/noxfile.py b/noxfile.py
@@ -34,6 +34,7 @@
 nox.options.sessions = (
     "mypy",
     "tests",
+    "benches",
     "doctest",
     "test_cookiecutter",
 )
@@ -44,6 +45,7 @@
     "pytest",
     "pytest-snapshot",
     "pytest-durations",
+    "pytest-benchmark",
     "pyarrow",
     "requests-mock",
     "time-machine",
@@ -104,13 +106,34 @@ def tests(session: Session) -> None:
             "pytest",
             "-v",
             "--durations=10",
+            "--benchmark-skip",
             *session.posargs,
         )
     finally:
         if session.interactive:
             session.notify("coverage", posargs=[])
 
 
+@session(python=main_python_version)
+def benches(session: Session) -> None:
+    """Run benchmarks."""
+    session.install(".[s3]")
+    session.install(*test_dependencies)
+    sqlalchemy_version = os.environ.get("SQLALCHEMY_VERSION")
+    if sqlalchemy_version:
+        # Bypass nox-poetry use of --constraint so we can install a version of
+        # SQLAlchemy that doesn't match what's in poetry.lock.
+        session.poetry.session.install(  # type: ignore[attr-defined]
+            f"sqlalchemy=={sqlalchemy_version}",
+        )
+    session.run(
+        "pytest",
+        "--benchmark-only",
+        "--benchmark-json=output.json",
+        *session.posargs,
+    )
+
+
 @session(python=main_python_version)
 def update_snapshots(session: Session) -> None:
     """Update pytest snapshots."""

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -118,6 +118,7 @@ types-requests = ">=2.28.11"
 types-simplejson = ">=3.18.0"
 types-PyYAML = ">=6.0.12"
 xdoctest = ">=1.1.1"
+pytest-benchmark = "^4.0.0"
 
 [tool.black]
 exclude = ".*simpleeval.*"

diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py
@@ -358,17 +358,15 @@ def test_filter_transforms_w_error(
         )
 
 
-def _test_transform(
-    test_name: str,
+def _run_transform(
     *,
     stream_maps,
     stream_map_config,
-    expected_result,
-    expected_schemas,
     sample_stream,
     sample_catalog_obj,
 ):
     output: dict[str, list[dict]] = {}
+    output_schemas = {}
     mapper = PluginMapper(
         plugin_config={
             "stream_maps": stream_maps,
@@ -383,15 +381,7 @@ def _test_transform(
             if isinstance(stream_map, RemoveRecordTransform):
                 logging.info("Skipping ignored stream '%s'", stream_name)
                 continue
-
-            assert (
-                expected_schemas[stream_map.stream_alias]
-                == stream_map.transformed_schema
-            ), (
-                f"Failed '{test_name}' schema test. Generated schema was "
-                f"{json.dumps(stream_map.transformed_schema, indent=2)}"
-            )
-
+            output_schemas[stream_map.stream_alias] = stream_map.transformed_schema
             output[stream_map.stream_alias] = []
             for record in stream:
                 result = stream_map.transform(record)
@@ -400,6 +390,39 @@ def _test_transform(
                     continue
 
                 output[stream_map.stream_alias].append(result)
+    return output, output_schemas
+
+
+def _test_transform(
+    test_name: str,
+    *,
+    stream_maps,
+    stream_map_config,
+    expected_result,
+    expected_schemas,
+    sample_stream,
+    sample_catalog_obj,
+):
+    output, output_schemas = _run_transform(
+        stream_maps=stream_maps,
+        stream_map_config=stream_map_config,
+        sample_stream=sample_stream,
+        sample_catalog_obj=sample_catalog_obj,
+    )
+
+    assert set(expected_schemas.keys()) == set(output_schemas.keys()), (
+        f"Failed `{test_name}` schema test. "
+        f"'{set(expected_schemas.keys()) - set(output_schemas.keys())}' "
+        "schemas not found. "
+        f"'{set(output_schemas.keys()) - set(expected_schemas.keys())}' "
+        "schemas not expected. "
+    )
+    for expected_schema_name, expected_schema in expected_schemas.items():
+        output_schema = output_schemas[expected_schema_name]
+        assert expected_schema == output_schema, (
+            f"Failed '{test_name}' schema test. Generated schema was "
+            f"{json.dumps(output_schema, indent=2)}"
+        )
 
     assert expected_result == output, (
         f"Failed '{test_name}' record result test. "
@@ -665,3 +688,37 @@ def test_mapped_stream(
 
     buf.seek(0)
     snapshot.assert_match(buf.read(), snapshot_name)
+
+
+def test_bench_simple_map_transforms(
+    benchmark,
+    sample_stream,
+    sample_catalog_dict,
+    transform_stream_maps,
+    stream_map_config,
+):
+    """Run benchmark tests using the "repositories" stream."""
+    stream_size_scale = 1000
+
+    repositories_catalog = {
+        "streams": [
+            x
+            for x in sample_catalog_dict["streams"]
+            if x["tap_stream_id"] == "repositories"
+        ],
+    }
+
+    repositories_sample_stream = {
+        "repositories": sample_stream["repositories"] * stream_size_scale,
+    }
+    repositories_transform_stream_maps = {
+        "repositories": transform_stream_maps["repositories"],
+    }
+    repositories_sample_catalog_obj = Catalog.from_dict(repositories_catalog)
+    benchmark(
+        _run_transform,
+        stream_maps=repositories_transform_stream_maps,
+        stream_map_config=stream_map_config,
+        sample_stream=repositories_sample_stream,
+        sample_catalog_obj=repositories_sample_catalog_obj,
+    )