Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Benchmarks for record datetime parsing #2054

60 changes: 60 additions & 0 deletions tests/core/sinks/test_validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import datetime
import itertools

import pytest

from tests.conftest import BatchSinkMock, TargetMock

Expand Down Expand Up @@ -39,3 +42,60 @@ def test_validate_record():
)
assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00"
assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999"


@pytest.fixture
def bench_sink() -> BatchSinkMock:
target = TargetMock()
return BatchSinkMock(
target,
"users",
{
"type": "object",
"properties": {
"id": {"type": "integer"},
"created_at": {"type": "string", "format": "date-time"},
"updated_at": {"type": "string", "format": "date-time"},
"deleted_at": {"type": "string", "format": "date-time"},
},
},
["id"],
)


@pytest.fixture
def bench_record():
return {
"id": 1,
"created_at": "2021-01-01T00:08:00-07:00",
"updated_at": "2022-01-02T00:09:00-07:00",
"deleted_at": "2023-01-03T00:10:00.0000",
}


def test_bench_parse_timestamps_in_record(benchmark, bench_sink, bench_record):
"""Run benchmark for Sink method _parse_timestamps_in_record."""
number_of_runs = 10000

sink: BatchSinkMock = bench_sink

def run_parse_timestamps_in_record():
for record in itertools.repeat(bench_record, number_of_runs):
_ = sink._parse_timestamps_in_record(
record.copy(), sink.schema, sink.datetime_error_treatment
)

benchmark(run_parse_timestamps_in_record)


def test_bench_validate_and_parse(benchmark, bench_sink, bench_record):
"""Run benchmark for Sink method _validate_and_parse."""
number_of_runs = 10000

sink: BatchSinkMock = bench_sink

def run_validate_and_parse():
for record in itertools.repeat(bench_record, number_of_runs):
_ = sink._validate_and_parse(record.copy())

benchmark(run_validate_and_parse)