Skip to content

Commit

Permalink
fix(batch-exports): Do not allow backfills in the future (#24517)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
tomasfarias and github-actions[bot] authored Aug 22, 2024
1 parent 1d7949e commit dcf38bb
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 4 deletions.
8 changes: 4 additions & 4 deletions frontend/src/scenes/pipeline/BatchExportBackfillModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ export function BatchExportBackfillModal({ id }: BatchExportRunsLogicProps): JSX
// So, if a user of a daily export selects "2024-08-14" they mean "2024-08-14 00:00:00 in their
// project's timezone".
}
<LemonField name="start_at" label="Start Date" className="flex-1">
<LemonField name="start_at" label={`Start Date (${timezone})`} className="flex-1">
{({ value, onChange }) => (
<LemonCalendarSelectInput
value={
value
? batchExportConfig
? batchExportConfig.interval === 'day'
? value.hour(0).minute(0).second(0)
: value
: value.tz(timezone)
: value
: value
}
Expand Down Expand Up @@ -99,15 +99,15 @@ export function BatchExportBackfillModal({ id }: BatchExportRunsLogicProps): JSX
/>
)}
</LemonField>
<LemonField name="end_at" label="End date" className="flex-1">
<LemonField name="end_at" label={`End Date (${timezone})`} className="flex-1">
{({ value, onChange }) => (
<LemonCalendarSelectInput
value={
value
? batchExportConfig
? batchExportConfig.interval === 'day'
? value.hour(0).minute(0).second(0)
: value
: value.tz(timezone)
: value
: value
}
Expand Down
27 changes: 27 additions & 0 deletions frontend/src/scenes/pipeline/batchExportRunsLogic.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,33 @@ export const batchExportRunsLogic = kea<batchExportRunsLogicType>([
}
}

let upperBound = dayjs().tz(teamLogic.values.timezone)
let period = '1 hour'

if (values.batchExportConfig && end_at) {
if (values.batchExportConfig.interval == 'hour') {
upperBound = upperBound.add(1, 'hour')
} else if (values.batchExportConfig.interval == 'day') {
upperBound = upperBound.hour(0).minute(0).second(0)
upperBound = upperBound.add(1, 'day')
period = '1 day'
} else if (values.batchExportConfig.interval.endsWith('minutes')) {
// TODO: Make this generic for all minute frequencies.
// Currently, only 5 minute batch exports are supported.
upperBound = upperBound.add(5, 'minute')
period = '5 minutes'
} else {
upperBound = upperBound.add(1, 'hour')
}

if (end_at > upperBound) {
lemonToast.error(
`Requested backfill end date lies too far into the future. Use an end date that is no more than ${period} from now (in your project's timezone)`
)
return
}
}

await new Promise((resolve) => setTimeout(resolve, 1000))
await api.batchExports
.createBackfill(props.id, {
Expand Down
45 changes: 45 additions & 0 deletions posthog/api/test/batch_exports/test_backfill.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import datetime as dt

import pytest
from django.test.client import Client as HttpClient
from freezegun import freeze_time
from rest_framework import status

from posthog.api.test.batch_exports.conftest import start_test_worker
Expand Down Expand Up @@ -97,6 +100,48 @@ def test_batch_export_backfill_with_non_isoformatted_dates(client: HttpClient):
assert response.status_code == status.HTTP_400_BAD_REQUEST, response.json()


def test_batch_export_backfill_with_end_at_in_the_future(client: HttpClient):
"""Test a BatchExport backfill fails if we pass malformed dates."""
temporal = sync_connect()

destination_data = {
"type": "S3",
"config": {
"bucket_name": "my-production-s3-bucket",
"region": "us-east-1",
"prefix": "posthog-events/",
"aws_access_key_id": "abc123",
"aws_secret_access_key": "secret",
},
}
batch_export_data = {
"name": "my-production-s3-bucket-destination",
"destination": destination_data,
"interval": "hour",
}

organization = create_organization("Test Org")
team = create_team(organization)
user = create_user("[email protected]", "Test User", organization)
test_time = dt.datetime.now(dt.UTC)
client.force_login(user)

with start_test_worker(temporal):
batch_export = create_batch_export_ok(client, team.pk, batch_export_data)

batch_export_id = batch_export["id"]

with freeze_time(test_time):
response = backfill_batch_export(
client,
team.pk,
batch_export_id,
test_time.isoformat(),
(test_time + dt.timedelta(hours=1, seconds=1)).isoformat(),
)
assert response.status_code == status.HTTP_400_BAD_REQUEST, response.json()


def test_batch_export_backfill_with_naive_bounds(client: HttpClient):
"""Test a BatchExport backfill fails if we naive dates."""
temporal = sync_connect()
Expand Down
6 changes: 6 additions & 0 deletions posthog/batch_exports/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,12 @@ def backfill(self, request: request.Request, *args, **kwargs) -> response.Respon
raise ValidationError("The initial backfill datetime 'start_at' happens after 'end_at'")

batch_export = self.get_object()

if end_at > dt.datetime.now(dt.UTC) + batch_export.interval_time_delta:
raise ValidationError(
f"The provided 'end_at' ({end_at.isoformat()}) is too far into the future. Cannot backfill beyond 1 batch period into the future."
)

temporal = sync_connect()
try:
backfill_id = backfill_export(temporal, str(batch_export.pk), self.team_id, start_at, end_at)
Expand Down

0 comments on commit dcf38bb

Please sign in to comment.