Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize is_iceberg Field for Consistency in Snowflake with QUOTED_IDENTIFIERS_IGNORE_CASE #1229

Merged
merged 9 commits into from
Nov 10, 2024
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241104-172349.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Iceberg quoting ignore fix.
time: 2024-11-04T17:23:49.706297-08:00
custom:
Author: versusfacit
Issue: "1227"
5 changes: 5 additions & 0 deletions dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ def list_relations_without_caching(
# this can be collapsed once Snowflake adds is_iceberg to show objects
columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"]
if self.behavior.enable_iceberg_materializations.no_warn:
# The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like
# is_iceberg which is created by dbt, but it does not affect the case
# of column values in Snowflake's SHOW OBJECTS query! This
# normalization step ensures metadata queries are handled consistently.
schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"})
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
columns.append("is_iceberg")

return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)]
Expand Down
3 changes: 1 addition & 2 deletions dbt/include/snowflake/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@
{% endmacro %}

{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %}

{%- set max_total_results = max_results_per_iter * max_iter -%}
{%- set sql -%}
{% if schema_relation is string %}
Expand All @@ -147,7 +146,7 @@
{# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the
-- latency penalty. #}
{% if adapter.behavior.enable_iceberg_materializations.no_warn %}
select all_objects.*, is_iceberg as "is_iceberg"
select all_objects.*, is_iceberg
Copy link
Contributor Author

@VersusFacit VersusFacit Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested this by manually place QUOTED_IDENTIFIERS_IGNORE_CASE = true and QUOTED_IDENTIFIERS_IGNORE_CASE = false and then verifying it didn't matter. This is a more predictable way to handle metadata query column names.

from table(result_scan(last_query_id(-1))) all_objects
left join INFORMATION_SCHEMA.tables as all_tables
on all_tables.table_name = all_objects."name"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest

from pathlib import Path

from dbt.adapters.factory import get_adapter_by_type
from dbt.adapters.snowflake import SnowflakeRelation

Expand Down Expand Up @@ -41,8 +43,32 @@
"""
)

_MODEL_ICEBERG = """
{{
config(
materialized = "table",
table_format="iceberg",
external_volume="s3_iceberg_snow",
)
}}

select 1
"""


class ShowObjectsBase:
@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations


class TestShowObjects:
class TestShowObjects(ShowObjectsBase):
views: int = 10
tables: int = 10
dynamic_tables: int = 10
Expand All @@ -66,16 +92,6 @@ def setup(self, project):
run_dbt(["seed"])
run_dbt(["run"])

@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations

def test_list_relations_without_caching(self, project):
relations = self.list_relations_without_caching(project)
assert len([relation for relation in relations if relation.is_view]) == self.views
Expand All @@ -87,3 +103,25 @@ def test_list_relations_without_caching(self, project):
len([relation for relation in relations if relation.is_dynamic_table])
== self.dynamic_tables
)


class TestShowIcebergObjects(ShowObjectsBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"enable_iceberg_materializations": True}}

@pytest.fixture(scope="class")
def models(self):
return {"my_model.sql": _MODEL_ICEBERG}

def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project):
"""https://github.com/dbt-labs/dbt-snowflake/issues/1227

The list relations function involves a metadata sub-query. Regardless of
QUOTED_IDENTIFIERS_IGNORE_CASE, this function will fail without proper
normalization within the encapsulating python function after the macro invocation
returns. This test verifies that normalization is working.
"""
run_dbt(["run"])

self.list_relations_without_caching(project)