From 1c0d49547a99903c4a168654202395273ba65524 Mon Sep 17 00:00:00 2001 From: Ted Kaemming <65315+tkaemming@users.noreply.github.com> Date: Tue, 26 Nov 2024 17:49:43 -0800 Subject: [PATCH] add to management command --- ee/clickhouse/materialized_columns/analyze.py | 3 ++- ee/clickhouse/materialized_columns/columns.py | 3 ++- ee/management/commands/materialize_columns.py | 11 ++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ee/clickhouse/materialized_columns/analyze.py b/ee/clickhouse/materialized_columns/analyze.py index 43a1e83256912..bfae76ef2432c 100644 --- a/ee/clickhouse/materialized_columns/analyze.py +++ b/ee/clickhouse/materialized_columns/analyze.py @@ -171,6 +171,7 @@ def materialize_properties_task( backfill_period_days: int = MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS, dry_run: bool = False, team_id_to_analyze: Optional[int] = None, + is_nullable: bool = False, ) -> None: """ Creates materialized columns for event and person properties based off of slow queries @@ -203,7 +204,7 @@ def materialize_properties_task( logger.info(f"Materializing column. table={table}, property_name={property_name}") if not dry_run: - materialize(table, property_name, table_column=table_column) + materialize(table, property_name, table_column=table_column, is_nullable=is_nullable) properties[table].append((property_name, table_column)) if backfill_period_days > 0 and not dry_run: diff --git a/ee/clickhouse/materialized_columns/columns.py b/ee/clickhouse/materialized_columns/columns.py index ee68f02650d5a..25d588e3e7180 100644 --- a/ee/clickhouse/materialized_columns/columns.py +++ b/ee/clickhouse/materialized_columns/columns.py @@ -233,6 +233,7 @@ def materialize( column_name: ColumnName | None = None, table_column: TableColumn = DEFAULT_TABLE_COLUMN, create_minmax_index=not TEST, + is_nullable: bool = False, ) -> ColumnName | None: if (property, table_column) in get_materialized_columns(table): if TEST: @@ -253,7 +254,7 @@ def materialize( property_name=property, is_disabled=False, ), - is_nullable=False, # TODO + is_nullable=is_nullable, ) table_info.map_data_nodes( diff --git a/ee/management/commands/materialize_columns.py b/ee/management/commands/materialize_columns.py index c1ca3b3fd2287..5ddbf55dea2b7 100644 --- a/ee/management/commands/materialize_columns.py +++ b/ee/management/commands/materialize_columns.py @@ -1,3 +1,4 @@ +import argparse import logging from django.core.management.base import BaseCommand @@ -69,8 +70,14 @@ def add_arguments(self, parser): default=MATERIALIZE_COLUMNS_MAX_AT_ONCE, help="Max number of columns to materialize via single invocation. Same as MATERIALIZE_COLUMNS_MAX_AT_ONCE env variable.", ) + parser.add_argument( + "--nullable", + action=argparse.BooleanOptionalAction, + default=True, + dest="is_nullable", + ) - def handle(self, *args, **options): + def handle(self, *, is_nullable: bool, **options): logger.setLevel(logging.INFO) if options["dry_run"]: @@ -90,6 +97,7 @@ def handle(self, *args, **options): ], backfill_period_days=options["backfill_period"], dry_run=options["dry_run"], + is_nullable=is_nullable, ) else: materialize_properties_task( @@ -99,4 +107,5 @@ def handle(self, *args, **options): backfill_period_days=options["backfill_period"], dry_run=options["dry_run"], team_id_to_analyze=options["analyze_team_id"], + is_nullable=is_nullable, )