-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Support nullable materialized columns using native types #26448
Merged
Merged
Changes from 14 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
19bf4b7
consolidate column type and expression logic
tkaemming 24caa0c
reduce surface area of get_enabled_materialized_columns
tkaemming 3108d15
get_materialized_columns should return columns
tkaemming 1a9202a
move filtering up the stack
tkaemming a2314e8
expose MaterializedColumn protocol for nullability's sake
tkaemming 3c44b03
update hogql printer to handle nullable columns
tkaemming aab924f
oops
tkaemming 6f611e4
DAMN
tkaemming a8269bc
add to management command
tkaemming c42bff5
add test for printer behavior
tkaemming 5de94b8
reduce queries executed due to reordering instance setting check
tkaemming 684c4a4
update mocks
tkaemming ab96f6c
mypy baseline update
tkaemming 6924ecf
type test
tkaemming f35ad60
use the thing that exists that does the thing
tkaemming 78d6041
Merge branch 'master' into column-types
tkaemming 4f59fd7
Update query snapshots
github-actions[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,18 +5,18 @@ | |
from copy import copy | ||
from dataclasses import dataclass, replace | ||
from datetime import timedelta | ||
from typing import Any, Literal, NamedTuple, TypeVar, cast | ||
from typing import Any, Literal, TypeVar, cast | ||
|
||
from clickhouse_driver import Client | ||
from django.utils.timezone import now | ||
|
||
from posthog.cache_utils import cache_for | ||
from posthog.clickhouse.client.connection import default_client | ||
from posthog.clickhouse.cluster import ClickhouseCluster, ConnectionInfo, FuturesMap, HostInfo | ||
from posthog.clickhouse.kafka_engine import trim_quotes_expr | ||
from posthog.clickhouse.materialized_columns import ColumnName, TablesWithMaterializedColumns | ||
from posthog.client import sync_execute | ||
from posthog.models.event.sql import EVENTS_DATA_TABLE | ||
from posthog.models.instance_setting import get_instance_setting | ||
from posthog.models.person.sql import PERSONS_TABLE | ||
from posthog.models.property import PropertyName, TableColumn, TableWithProperties | ||
from posthog.models.utils import generate_random_short_suffix | ||
|
@@ -26,8 +26,6 @@ | |
|
||
DEFAULT_TABLE_COLUMN: Literal["properties"] = "properties" | ||
|
||
TRIM_AND_EXTRACT_PROPERTY = trim_quotes_expr("JSONExtractRaw({table_column}, %(property)s)") | ||
|
||
SHORT_TABLE_COLUMN_NAME = { | ||
"properties": "p", | ||
"group_properties": "gp", | ||
|
@@ -40,15 +38,36 @@ | |
} | ||
|
||
|
||
class MaterializedColumn(NamedTuple): | ||
@dataclass | ||
class MaterializedColumn: | ||
name: ColumnName | ||
details: MaterializedColumnDetails | ||
is_nullable: bool | ||
|
||
@property | ||
def type(self) -> str: | ||
if self.is_nullable: | ||
return "Nullable(String)" | ||
else: | ||
return "String" | ||
|
||
def get_expression_and_parameters(self) -> tuple[str, dict[str, Any]]: | ||
if self.is_nullable: | ||
return ( | ||
f"JSONExtract({self.details.table_column}, %(property_name)s, %(property_type)s)", | ||
{"property_name": self.details.property_name, "property_type": self.type}, | ||
) | ||
else: | ||
return ( | ||
trim_quotes_expr(f"JSONExtractRaw({self.details.table_column}, %(property)s)"), | ||
{"property": self.details.property_name}, | ||
) | ||
|
||
@staticmethod | ||
def get_all(table: TablesWithMaterializedColumns) -> Iterator[MaterializedColumn]: | ||
rows = sync_execute( | ||
""" | ||
SELECT name, comment | ||
SELECT name, comment, type like 'Nullable(%%)' as is_nullable | ||
FROM system.columns | ||
WHERE database = %(database)s | ||
AND table = %(table)s | ||
|
@@ -58,8 +77,8 @@ def get_all(table: TablesWithMaterializedColumns) -> Iterator[MaterializedColumn | |
{"database": CLICKHOUSE_DATABASE, "table": table}, | ||
) | ||
|
||
for name, comment in rows: | ||
yield MaterializedColumn(name, MaterializedColumnDetails.from_column_comment(comment)) | ||
for name, comment, is_nullable in rows: | ||
yield MaterializedColumn(name, MaterializedColumnDetails.from_column_comment(comment), is_nullable) | ||
|
||
@staticmethod | ||
def get(table: TablesWithMaterializedColumns, column_name: ColumnName) -> MaterializedColumn: | ||
|
@@ -111,18 +130,20 @@ def from_column_comment(cls, comment: str) -> MaterializedColumnDetails: | |
|
||
def get_materialized_columns( | ||
table: TablesWithMaterializedColumns, | ||
exclude_disabled_columns: bool = False, | ||
) -> dict[tuple[PropertyName, TableColumn], ColumnName]: | ||
if not get_instance_setting("MATERIALIZED_COLUMNS_ENABLED"): | ||
return {} | ||
|
||
) -> dict[tuple[PropertyName, TableColumn], MaterializedColumn]: | ||
return { | ||
(column.details.property_name, column.details.table_column): column.name | ||
(column.details.property_name, column.details.table_column): column | ||
for column in MaterializedColumn.get_all(table) | ||
if not (exclude_disabled_columns and column.details.is_disabled) | ||
} | ||
|
||
|
||
@cache_for(timedelta(minutes=15)) | ||
def get_enabled_materialized_columns( | ||
table: TablesWithMaterializedColumns, | ||
) -> dict[tuple[PropertyName, TableColumn], MaterializedColumn]: | ||
return {k: column for k, column in get_materialized_columns(table).items() if not column.details.is_disabled} | ||
|
||
|
||
def get_cluster() -> ClickhouseCluster: | ||
extra_hosts = [] | ||
for host_config in map(copy, CLICKHOUSE_PER_TEAM_SETTINGS.values()): | ||
|
@@ -169,13 +190,10 @@ class CreateColumnOnDataNodesTask: | |
add_column_comment: bool | ||
|
||
def execute(self, client: Client) -> None: | ||
expression, parameters = self.column.get_expression_and_parameters() | ||
actions = [ | ||
f""" | ||
ADD COLUMN IF NOT EXISTS {self.column.name} VARCHAR | ||
MATERIALIZED {TRIM_AND_EXTRACT_PROPERTY.format(table_column=self.column.details.table_column)} | ||
""", | ||
f"ADD COLUMN IF NOT EXISTS {self.column.name} {self.column.type} MATERIALIZED {expression}", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :chef-kiss: |
||
] | ||
parameters = {"property": self.column.details.property_name} | ||
|
||
if self.add_column_comment: | ||
actions.append(f"COMMENT COLUMN {self.column.name} %(comment)s") | ||
|
@@ -201,7 +219,7 @@ def execute(self, client: Client) -> None: | |
client.execute( | ||
f""" | ||
ALTER TABLE {self.table} | ||
ADD COLUMN IF NOT EXISTS {self.column.name} VARCHAR, | ||
ADD COLUMN IF NOT EXISTS {self.column.name} {self.column.type}, | ||
COMMENT COLUMN {self.column.name} %(comment)s | ||
""", | ||
{"comment": self.column.details.as_column_comment()}, | ||
|
@@ -215,6 +233,7 @@ def materialize( | |
column_name: ColumnName | None = None, | ||
table_column: TableColumn = DEFAULT_TABLE_COLUMN, | ||
create_minmax_index=not TEST, | ||
is_nullable: bool = False, | ||
) -> ColumnName | None: | ||
if (property, table_column) in get_materialized_columns(table): | ||
if TEST: | ||
|
@@ -235,6 +254,7 @@ def materialize( | |
property_name=property, | ||
is_disabled=False, | ||
), | ||
is_nullable=is_nullable, | ||
) | ||
|
||
table_info.map_data_nodes( | ||
|
@@ -275,16 +295,12 @@ def update_column_is_disabled(table: TablesWithMaterializedColumns, column_name: | |
cluster = get_cluster() | ||
table_info = tables[table] | ||
|
||
column = MaterializedColumn.get(table, column_name) | ||
|
||
cluster.map_all_hosts( | ||
UpdateColumnCommentTask( | ||
table_info.read_table, | ||
MaterializedColumn( | ||
name=column_name, | ||
details=replace( | ||
MaterializedColumn.get(table, column_name).details, | ||
is_disabled=is_disabled, | ||
), | ||
), | ||
replace(column, details=replace(column.details, is_disabled=is_disabled)), | ||
).execute | ||
).result() | ||
|
||
|
@@ -345,12 +361,13 @@ def execute(self, client: Client) -> None: | |
# Note that for this to work all inserts should list columns explicitly | ||
# Improve this if https://github.com/ClickHouse/ClickHouse/issues/27730 ever gets resolved | ||
for column in self.columns: | ||
expression, parameters = column.get_expression_and_parameters() | ||
client.execute( | ||
f""" | ||
ALTER TABLE {self.table} | ||
MODIFY COLUMN {column.name} VARCHAR DEFAULT {TRIM_AND_EXTRACT_PROPERTY.format(table_column=column.details.table_column)} | ||
MODIFY COLUMN {column.name} {column.type} DEFAULT {expression} | ||
""", | ||
{"property": column.details.property_name}, | ||
parameters, | ||
settings=self.test_settings, | ||
) | ||
|
||
|
@@ -420,10 +437,10 @@ def _materialized_column_name( | |
prefix += f"{SHORT_TABLE_COLUMN_NAME[table_column]}_" | ||
property_str = re.sub("[^0-9a-zA-Z$]", "_", property) | ||
|
||
existing_materialized_columns = set(get_materialized_columns(table).values()) | ||
existing_materialized_column_names = {column.name for column in get_materialized_columns(table).values()} | ||
suffix = "" | ||
|
||
while f"{prefix}{property_str}{suffix}" in existing_materialized_columns: | ||
while f"{prefix}{property_str}{suffix}" in existing_materialized_column_names: | ||
suffix = "_" + generate_random_short_suffix() | ||
|
||
return f"{prefix}{property_str}{suffix}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll probably consolidate these two classes (
MaterializedColumn
&MaterializedColumnDetails
) at some point (there's not much benefit to them being separate at this point) but not in a big hurry to do that.