From dcf38bb7f949d0397c5da0fc620d9168a09750f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Thu, 22 Aug 2024 15:37:39 +0200 Subject: [PATCH] fix(batch-exports): Do not allow backfills in the future (#24517) Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- .../pipeline/BatchExportBackfillModal.tsx | 8 ++-- .../scenes/pipeline/batchExportRunsLogic.tsx | 27 +++++++++++ .../api/test/batch_exports/test_backfill.py | 45 +++++++++++++++++++ posthog/batch_exports/http.py | 6 +++ 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/frontend/src/scenes/pipeline/BatchExportBackfillModal.tsx b/frontend/src/scenes/pipeline/BatchExportBackfillModal.tsx index 41c0b9e0b48c7..0e9657c985f81 100644 --- a/frontend/src/scenes/pipeline/BatchExportBackfillModal.tsx +++ b/frontend/src/scenes/pipeline/BatchExportBackfillModal.tsx @@ -61,7 +61,7 @@ export function BatchExportBackfillModal({ id }: BatchExportRunsLogicProps): JSX // So, if a user of a daily export selects "2024-08-14" they mean "2024-08-14 00:00:00 in their // project's timezone". } - + {({ value, onChange }) => ( )} - + {({ value, onChange }) => ( ([ } } + let upperBound = dayjs().tz(teamLogic.values.timezone) + let period = '1 hour' + + if (values.batchExportConfig && end_at) { + if (values.batchExportConfig.interval == 'hour') { + upperBound = upperBound.add(1, 'hour') + } else if (values.batchExportConfig.interval == 'day') { + upperBound = upperBound.hour(0).minute(0).second(0) + upperBound = upperBound.add(1, 'day') + period = '1 day' + } else if (values.batchExportConfig.interval.endsWith('minutes')) { + // TODO: Make this generic for all minute frequencies. + // Currently, only 5 minute batch exports are supported. + upperBound = upperBound.add(5, 'minute') + period = '5 minutes' + } else { + upperBound = upperBound.add(1, 'hour') + } + + if (end_at > upperBound) { + lemonToast.error( + `Requested backfill end date lies too far into the future. Use an end date that is no more than ${period} from now (in your project's timezone)` + ) + return + } + } + await new Promise((resolve) => setTimeout(resolve, 1000)) await api.batchExports .createBackfill(props.id, { diff --git a/posthog/api/test/batch_exports/test_backfill.py b/posthog/api/test/batch_exports/test_backfill.py index e7a0eccadbc3a..8a760e5e7c0cd 100644 --- a/posthog/api/test/batch_exports/test_backfill.py +++ b/posthog/api/test/batch_exports/test_backfill.py @@ -1,5 +1,8 @@ +import datetime as dt + import pytest from django.test.client import Client as HttpClient +from freezegun import freeze_time from rest_framework import status from posthog.api.test.batch_exports.conftest import start_test_worker @@ -97,6 +100,48 @@ def test_batch_export_backfill_with_non_isoformatted_dates(client: HttpClient): assert response.status_code == status.HTTP_400_BAD_REQUEST, response.json() +def test_batch_export_backfill_with_end_at_in_the_future(client: HttpClient): + """Test a BatchExport backfill fails if we pass malformed dates.""" + temporal = sync_connect() + + destination_data = { + "type": "S3", + "config": { + "bucket_name": "my-production-s3-bucket", + "region": "us-east-1", + "prefix": "posthog-events/", + "aws_access_key_id": "abc123", + "aws_secret_access_key": "secret", + }, + } + batch_export_data = { + "name": "my-production-s3-bucket-destination", + "destination": destination_data, + "interval": "hour", + } + + organization = create_organization("Test Org") + team = create_team(organization) + user = create_user("test@user.com", "Test User", organization) + test_time = dt.datetime.now(dt.UTC) + client.force_login(user) + + with start_test_worker(temporal): + batch_export = create_batch_export_ok(client, team.pk, batch_export_data) + + batch_export_id = batch_export["id"] + + with freeze_time(test_time): + response = backfill_batch_export( + client, + team.pk, + batch_export_id, + test_time.isoformat(), + (test_time + dt.timedelta(hours=1, seconds=1)).isoformat(), + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST, response.json() + + def test_batch_export_backfill_with_naive_bounds(client: HttpClient): """Test a BatchExport backfill fails if we naive dates.""" temporal = sync_connect() diff --git a/posthog/batch_exports/http.py b/posthog/batch_exports/http.py index 596bb2ba4252a..3783c4aab2d6b 100644 --- a/posthog/batch_exports/http.py +++ b/posthog/batch_exports/http.py @@ -390,6 +390,12 @@ def backfill(self, request: request.Request, *args, **kwargs) -> response.Respon raise ValidationError("The initial backfill datetime 'start_at' happens after 'end_at'") batch_export = self.get_object() + + if end_at > dt.datetime.now(dt.UTC) + batch_export.interval_time_delta: + raise ValidationError( + f"The provided 'end_at' ({end_at.isoformat()}) is too far into the future. Cannot backfill beyond 1 batch period into the future." + ) + temporal = sync_connect() try: backfill_id = backfill_export(temporal, str(batch_export.pk), self.team_id, start_at, end_at)