Skip to content

Commit

Permalink
feat(web-analytics): Sessions table backfill: Make printing counts be…
Browse files Browse the repository at this point in the history
…forehand optional (#21123)

Make printing counts beforehand optional
  • Loading branch information
robbie-c authored Mar 24, 2024
1 parent 37f3ff3 commit 064f175
Showing 1 changed file with 25 additions and 8 deletions.
33 changes: 25 additions & 8 deletions posthog/management/commands/backfill_sessions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class BackfillQuery:
def execute(
self,
dry_run: bool = True,
print_counts: bool = True,
) -> None:
def source_column(column_name: str) -> str:
return get_property_string_expr(
Expand Down Expand Up @@ -112,9 +113,10 @@ def select_query(select_date: Optional[datetime] = None) -> str:
"""

# print the count of entries in the main sessions table
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")
if print_counts:
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")

if dry_run:
count_query = f"SELECT count(), uniq(session_id) FROM ({select_query()})"
Expand All @@ -133,9 +135,10 @@ def select_query(select_date: Optional[datetime] = None) -> str:
)

# print the count of entries in the main sessions table
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")
if print_counts:
count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}"
[(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS)
logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table")


class Command(BaseCommand):
Expand All @@ -154,11 +157,25 @@ def add_arguments(self, parser):
parser.add_argument(
"--use-offline-workload", action="store_true", help="actually execute INSERT queries (default is dry-run)"
)
parser.add_argument(
"--print-counts", action="store_true", help="print events and session count beforehand and afterwards"
)

def handle(self, *, live_run: bool, start_date: str, end_date: str, use_offline_workload: bool, **options):
def handle(
self,
*,
live_run: bool,
start_date: str,
end_date: str,
use_offline_workload: bool,
print_counts: bool,
**options,
):
logger.setLevel(logging.INFO)

start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
end_datetime = datetime.strptime(end_date, "%Y-%m-%d")

BackfillQuery(start_datetime, end_datetime, use_offline_workload).execute(dry_run=not live_run)
BackfillQuery(start_datetime, end_datetime, use_offline_workload).execute(
dry_run=not live_run, print_counts=print_counts
)

0 comments on commit 064f175

Please sign in to comment.