Skip to content

Commit

Permalink
feat(web-analytics): Sessions backfill: add settings to increase exec…
Browse files Browse the repository at this point in the history
…ution time (#21121)

Add settings to increase execution time
  • Loading branch information
robbie-c authored Mar 24, 2024
1 parent a63fd08 commit f2c2563
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions posthog/management/commands/backfill_sessions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

TARGET_TABLE = "sessions"

SETTINGS = {
"max_execution_time": 3600 # 1 hour
}


@dataclass
class BackfillQuery:
Expand Down Expand Up @@ -109,12 +113,12 @@ def select_query(select_date: Optional[datetime] = None) -> str:

# print the count of entries in the main sessions table
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query)
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")

if dry_run:
count_query = f"SELECT count(), uniq(session_id) FROM ({select_query()})"
[(events_count, sessions_count)] = sync_execute(count_query)
[(events_count, sessions_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{events_count} events and {sessions_count} sessions to backfill for")
logger.info(f"The first select query would be:\n{select_query(self.start_date)}")
return
Expand All @@ -125,11 +129,12 @@ def select_query(select_date: Optional[datetime] = None) -> str:
sync_execute(
query=f"""INSERT INTO writable_sessions {select_query(select_date=date)} SETTINGS max_execution_time=3600""",
workload=Workload.OFFLINE if self.use_offline_workload else Workload.DEFAULT,
settings=SETTINGS,
)

# print the count of entries in the main sessions table
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query)
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")


Expand Down

0 comments on commit f2c2563

Please sign in to comment.