Skip to content

Commit

Permalink
test(taps): Add benchmark test on map transforms via `pytest-benchmar…
Browse files Browse the repository at this point in the history
…k` (#2011)

* add benchmark test on map transforms via pytest-benchmark

* add action to comment benchmark result

* remove action to comment benchmark results
  • Loading branch information
haleemur authored Oct 10, 2023
1 parent a82a5da commit 7675942
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 28 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
- { session: tests, python-version: "3.11", os: "ubuntu-latest", sqlalchemy: "1.*" }
- { session: doctest, python-version: "3.10", os: "ubuntu-latest", sqlalchemy: "2.*" }
- { session: mypy, python-version: "3.8", os: "ubuntu-latest", sqlalchemy: "2.*" }
- { session: benches, python-version: "3.10", os: "ubuntu-latest", sqlalchemy: "2.*" }

steps:
- name: Check out the repository
Expand Down Expand Up @@ -100,6 +101,13 @@ jobs:
name: coverage-data
path: ".coverage.*"

- name: Upload Benchmark Result
if: always() && (matrix.session == 'benches')
uses: actions/[email protected]
with:
name: benchmark_results
path: output.json

tests-external:
name: External Tests
runs-on: ubuntu-latest
Expand Down
23 changes: 23 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
nox.options.sessions = (
"mypy",
"tests",
"benches",
"doctest",
"test_cookiecutter",
)
Expand All @@ -44,6 +45,7 @@
"pytest",
"pytest-snapshot",
"pytest-durations",
"pytest-benchmark",
"pyarrow",
"requests-mock",
"time-machine",
Expand Down Expand Up @@ -104,13 +106,34 @@ def tests(session: Session) -> None:
"pytest",
"-v",
"--durations=10",
"--benchmark-skip",
*session.posargs,
)
finally:
if session.interactive:
session.notify("coverage", posargs=[])


@session(python=main_python_version)
def benches(session: Session) -> None:
"""Run benchmarks."""
session.install(".[s3]")
session.install(*test_dependencies)
sqlalchemy_version = os.environ.get("SQLALCHEMY_VERSION")
if sqlalchemy_version:
# Bypass nox-poetry use of --constraint so we can install a version of
# SQLAlchemy that doesn't match what's in poetry.lock.
session.poetry.session.install( # type: ignore[attr-defined]
f"sqlalchemy=={sqlalchemy_version}",
)
session.run(
"pytest",
"--benchmark-only",
"--benchmark-json=output.json",
*session.posargs,
)


@session(python=main_python_version)
def update_snapshots(session: Session) -> None:
"""Update pytest snapshots."""
Expand Down
56 changes: 41 additions & 15 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ types-requests = ">=2.28.11"
types-simplejson = ">=3.18.0"
types-PyYAML = ">=6.0.12"
xdoctest = ">=1.1.1"
pytest-benchmark = "^4.0.0"

[tool.black]
exclude = ".*simpleeval.*"
Expand Down
83 changes: 70 additions & 13 deletions tests/core/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,17 +358,15 @@ def test_filter_transforms_w_error(
)


def _test_transform(
test_name: str,
def _run_transform(
*,
stream_maps,
stream_map_config,
expected_result,
expected_schemas,
sample_stream,
sample_catalog_obj,
):
output: dict[str, list[dict]] = {}
output_schemas = {}
mapper = PluginMapper(
plugin_config={
"stream_maps": stream_maps,
Expand All @@ -383,15 +381,7 @@ def _test_transform(
if isinstance(stream_map, RemoveRecordTransform):
logging.info("Skipping ignored stream '%s'", stream_name)
continue

assert (
expected_schemas[stream_map.stream_alias]
== stream_map.transformed_schema
), (
f"Failed '{test_name}' schema test. Generated schema was "
f"{json.dumps(stream_map.transformed_schema, indent=2)}"
)

output_schemas[stream_map.stream_alias] = stream_map.transformed_schema
output[stream_map.stream_alias] = []
for record in stream:
result = stream_map.transform(record)
Expand All @@ -400,6 +390,39 @@ def _test_transform(
continue

output[stream_map.stream_alias].append(result)
return output, output_schemas


def _test_transform(
test_name: str,
*,
stream_maps,
stream_map_config,
expected_result,
expected_schemas,
sample_stream,
sample_catalog_obj,
):
output, output_schemas = _run_transform(
stream_maps=stream_maps,
stream_map_config=stream_map_config,
sample_stream=sample_stream,
sample_catalog_obj=sample_catalog_obj,
)

assert set(expected_schemas.keys()) == set(output_schemas.keys()), (
f"Failed `{test_name}` schema test. "
f"'{set(expected_schemas.keys()) - set(output_schemas.keys())}' "
"schemas not found. "
f"'{set(output_schemas.keys()) - set(expected_schemas.keys())}' "
"schemas not expected. "
)
for expected_schema_name, expected_schema in expected_schemas.items():
output_schema = output_schemas[expected_schema_name]
assert expected_schema == output_schema, (
f"Failed '{test_name}' schema test. Generated schema was "
f"{json.dumps(output_schema, indent=2)}"
)

assert expected_result == output, (
f"Failed '{test_name}' record result test. "
Expand Down Expand Up @@ -665,3 +688,37 @@ def test_mapped_stream(

buf.seek(0)
snapshot.assert_match(buf.read(), snapshot_name)


def test_bench_simple_map_transforms(
benchmark,
sample_stream,
sample_catalog_dict,
transform_stream_maps,
stream_map_config,
):
"""Run benchmark tests using the "repositories" stream."""
stream_size_scale = 1000

repositories_catalog = {
"streams": [
x
for x in sample_catalog_dict["streams"]
if x["tap_stream_id"] == "repositories"
],
}

repositories_sample_stream = {
"repositories": sample_stream["repositories"] * stream_size_scale,
}
repositories_transform_stream_maps = {
"repositories": transform_stream_maps["repositories"],
}
repositories_sample_catalog_obj = Catalog.from_dict(repositories_catalog)
benchmark(
_run_transform,
stream_maps=repositories_transform_stream_maps,
stream_map_config=stream_map_config,
sample_stream=repositories_sample_stream,
sample_catalog_obj=repositories_sample_catalog_obj,
)

0 comments on commit 7675942

Please sign in to comment.