chore(data-warehouse): add clause for 1 day sync (#21505)

add clause for 1 day sync
PostHog · Apr 12, 2024 · 9db5f50 · 9db5f50
1 parent 6ebb971
commit 9db5f50
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 1 deletion.
diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py
@@ -30,9 +30,11 @@
 )
 from posthog.warehouse.models.external_data_schema import get_postgres_schemas
 from posthog.temporal.common.logger import bind_temporal_worker_logger
+from posthog.utils import get_instance_region
 from typing import Dict, Tuple
 import asyncio
 from django.conf import settings
+from django.utils import timezone
 
 
 @dataclasses.dataclass
@@ -188,12 +190,25 @@ async def run_external_data_job(inputs: ExternalDataJobInputs) -> Tuple[TSchemaT
         # until we require re update of account_ids in stripe so they're all store
         if not stripe_secret_key:
             raise ValueError(f"Stripe secret key not found for job {model.id}")
+
+        # Hacky just for specific user
+        region = get_instance_region()
+        if region == "EU" and inputs.team_id == 11870:
+            prev_day = timezone.now() - dt.timedelta(days=1)
+            start_date = prev_day.replace(hour=0, minute=0, second=0, microsecond=0)
+            end_date = start_date + dt.timedelta(1)
+        else:
+            start_date = None
+            end_date = None
+
         source = stripe_source(
             api_key=stripe_secret_key,
             account_id=account_id,
             endpoints=tuple(endpoints),
             team_id=inputs.team_id,
             job_id=inputs.run_id,
+            start_date=start_date,
+            end_date=end_date,
         )
     elif model.pipeline.source_type == ExternalDataSource.Type.HUBSPOT:
         from posthog.temporal.data_imports.pipelines.hubspot.auth import refresh_access_token

diff --git a/posthog/temporal/data_imports/pipelines/stripe/helpers.py b/posthog/temporal/data_imports/pipelines/stripe/helpers.py
@@ -61,6 +61,8 @@ async def stripe_pagination(
     team_id: int,
     job_id: str,
     starting_after: Optional[str] = None,
+    start_date: Optional[Any] = None,
+    end_date: Optional[Any] = None,
 ):
     """
     Retrieves data from an endpoint with pagination.
@@ -88,6 +90,8 @@ async def stripe_pagination(
             account_id,
             endpoint,
             starting_after=starting_after,
+            start_date=start_date,
+            end_date=end_date,
         )
 
         if len(response["data"]) > 0:
@@ -106,7 +110,14 @@ async def stripe_pagination(
 
 @dlt.source(max_table_nesting=0)
 def stripe_source(
-    api_key: str, account_id: str, endpoints: Tuple[str, ...], team_id, job_id, starting_after: Optional[str] = None
+    api_key: str,
+    account_id: str,
+    endpoints: Tuple[str, ...],
+    team_id,
+    job_id,
+    starting_after: Optional[str] = None,
+    start_date: Optional[Any] = None,
+    end_date: Optional[Any] = None,
 ) -> Iterable[DltResource]:
     for endpoint in endpoints:
         yield dlt.resource(
@@ -120,4 +131,6 @@ def stripe_source(
             team_id=team_id,
             job_id=job_id,
             starting_after=starting_after,
+            start_date=start_date,
+            end_date=end_date,
         )