From 3ad6d2ab0f119a684d3a851ba23807eb75dbda4b Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Fri, 10 Nov 2023 16:36:44 +0900 Subject: [PATCH 1/5] Added unit test for test_hudi_merge_table with Mock using moto and mock_glue --- dev-requirements.txt | 1 + tests/functional/adapter/test_constraints.py | 0 tests/unit/constants.py | 4 + tests/unit/test_adapter.py | 36 ++++++++ tests/unit/utils.py | 96 ++++++++++++++++++++ 5 files changed, 137 insertions(+) create mode 100644 tests/functional/adapter/test_constraints.py create mode 100644 tests/unit/constants.py create mode 100644 tests/unit/utils.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 4501ad95..2027bd1d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -24,6 +24,7 @@ black==23.11.0 # Adapter specific dependencies waiter boto3 +moto~=4.2.7 dbt-core~=1.7.1 dbt-spark~=1.7.1 diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/constants.py b/tests/unit/constants.py new file mode 100644 index 00000000..9c4cf47f --- /dev/null +++ b/tests/unit/constants.py @@ -0,0 +1,4 @@ +CATALOG_ID = "1234567890101" +DATABASE_NAME = "test_dbt_glue" +BUCKET_NAME = "test-dbt-glue" +AWS_REGION = "us-east-1" diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 0e9b19fd..5032d57f 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -1,12 +1,15 @@ from typing import Any, Dict, Optional import unittest from unittest import mock +from moto import mock_glue from dbt.config import RuntimeConfig import dbt.flags as flags from dbt.adapters.glue import GlueAdapter +from dbt.adapters.glue.relation import SparkRelation from tests.util import config_from_parts_or_dicts +from .utils import MockAWSService class TestGlueAdapter(unittest.TestCase): @@ -58,3 +61,36 @@ def test_glue_connection(self): self.assertEqual(connection.type, "glue") self.assertEqual(connection.credentials.schema, "dbt_functional_test_01") self.assertIsNotNone(connection.handle) + + + @mock_glue + def test_get_table_type(self): + config = self._get_config() + adapter = GlueAdapter(config) + + database_name = "dbt_unit_test_01" + table_name = "test_table" + mock_aws_service = MockAWSService() + mock_aws_service.create_database(name=database_name) + mock_aws_service.create_iceberg_table(table_name=table_name, database_name=database_name) + target_relation = SparkRelation.create( + schema=database_name, + identifier=table_name, + ) + with mock.patch("dbt.adapters.glue.connections.open"): + connection = adapter.acquire_connection("dummy") + connection.handle # trigger lazy-load + self.assertEqual(adapter.get_table_type(target_relation), "iceberg_table") + + @mock_glue + def test_hudi_merge_table(self): + config = self._get_config() + adapter = GlueAdapter(config) + target_relation = SparkRelation.create( + schema="dbt_functional_test_01", + name="test_hudi_merge_table", + ) + with mock.patch("dbt.adapters.glue.connections.open"): + connection = adapter.acquire_connection("dummy") + connection.handle # trigger lazy-load + adapter.hudi_merge_table(target_relation, "SELECT 1", "id", "category", "empty", None, None) diff --git a/tests/unit/utils.py b/tests/unit/utils.py new file mode 100644 index 00000000..62a9cf09 --- /dev/null +++ b/tests/unit/utils.py @@ -0,0 +1,96 @@ +from typing import Optional +import boto3 + +from .constants import AWS_REGION, BUCKET_NAME, CATALOG_ID, DATABASE_NAME + + +class MockAWSService: + def create_database(self, name: str = DATABASE_NAME, catalog_id: str = CATALOG_ID): + glue = boto3.client("glue", region_name=AWS_REGION) + glue.create_database(DatabaseInput={"Name": name}, CatalogId=catalog_id) + + def create_table( + self, + table_name: str, + database_name: str = DATABASE_NAME, + catalog_id: str = CATALOG_ID, + location: Optional[str] = "auto", + ): + glue = boto3.client("glue", region_name=AWS_REGION) + if location == "auto": + location = f"s3://{BUCKET_NAME}/tables/{table_name}" + glue.create_table( + CatalogId=catalog_id, + DatabaseName=database_name, + TableInput={ + "Name": table_name, + "StorageDescriptor": { + "Columns": [ + { + "Name": "id", + "Type": "string", + }, + { + "Name": "country", + "Type": "string", + }, + ], + "Location": location, + }, + "PartitionKeys": [ + { + "Name": "dt", + "Type": "date", + }, + ], + "TableType": "table", + "Parameters": { + "compressionType": "snappy", + "classification": "parquet", + "projection.enabled": "false", + "typeOfData": "file", + }, + }, + ) + + def create_iceberg_table( + self, + table_name: str, + database_name: str = DATABASE_NAME, + catalog_id: str = CATALOG_ID): + glue = boto3.client("glue", region_name=AWS_REGION) + glue.create_table( + CatalogId=catalog_id, + DatabaseName=database_name, + TableInput={ + "Name": table_name, + "StorageDescriptor": { + "Columns": [ + { + "Name": "id", + "Type": "string", + }, + { + "Name": "country", + "Type": "string", + }, + { + "Name": "dt", + "Type": "date", + }, + ], + "Location": f"s3://{BUCKET_NAME}/tables/data/{table_name}", + }, + "PartitionKeys": [ + { + "Name": "dt", + "Type": "date", + }, + ], + "TableType": "EXTERNAL_TABLE", + "Parameters": { + "metadata_location": f"s3://{BUCKET_NAME}/tables/metadata/{table_name}/123.json", + "table_type": "iceberg", + }, + }, + ) From e61215ecf40e69cb917120e13ec175b82584415d Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Fri, 10 Nov 2023 16:37:30 +0900 Subject: [PATCH 2/5] Moved S3Url util class to util.py --- tests/functional/adapter/test_basic.py | 47 +++----------------------- tests/util.py | 37 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py index 87e40140..74adf164 100644 --- a/tests/functional/adapter/test_basic.py +++ b/tests/functional/adapter/test_basic.py @@ -1,8 +1,6 @@ import pytest -import boto3 import os -from urllib.parse import urlparse from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests from dbt.tests.adapter.basic.test_singular_tests_ephemeral import BaseSingularTestsEphemeral @@ -29,7 +27,7 @@ check_relations_equal, ) -from tests.util import get_s3_location, get_region +from tests.util import get_s3_location, get_region, cleanup_s3_location s3bucket = get_s3_location() @@ -61,11 +59,6 @@ base_materialized_var_sql = config_materialized_var + config_incremental_strategy + model_base -def cleanup_s3_location(): - client = boto3.client("s3", region_name=region) - S3Url(s3bucket + schema_name).delete_all_keys_v2(client) - - class TestSimpleMaterializationsGlue(BaseSimpleMaterializations): # all tests within this test has the same schema @pytest.fixture(scope="class") @@ -92,7 +85,7 @@ def models(self): @pytest.fixture(scope='class', autouse=True) def cleanup(self): - cleanup_s3_location() + cleanup_s3_location(s3bucket + schema_name, region) yield pass @@ -131,7 +124,7 @@ def models(self): @pytest.fixture(scope='class', autouse=True) def cleanup(self): - cleanup_s3_location() + cleanup_s3_location(s3bucket + schema_name, region) yield # test_ephemeral with refresh table @@ -184,7 +177,7 @@ def unique_schema(request, prefix) -> str: class TestIncrementalGlue(BaseIncremental): @pytest.fixture(scope='class', autouse=True) def cleanup(self): - cleanup_s3_location() + cleanup_s3_location(s3bucket + schema_name, region) yield @pytest.fixture(scope="class") @@ -250,7 +243,7 @@ def unique_schema(request, prefix) -> str: @pytest.fixture(scope='class', autouse=True) def cleanup(self): - cleanup_s3_location() + cleanup_s3_location(s3bucket + schema_name, region) yield def test_generic_tests(self, project): @@ -291,33 +284,3 @@ def test_generic_tests(self, project): #class TestSnapshotTimestampGlue(BaseSnapshotTimestamp): # pass - -class S3Url(object): - def __init__(self, url): - self._parsed = urlparse(url, allow_fragments=False) - - @property - def bucket(self): - return self._parsed.netloc - - @property - def key(self): - if self._parsed.query: - return self._parsed.path.lstrip("/") + "?" + self._parsed.query - else: - return self._parsed.path.lstrip("/") - - @property - def url(self): - return self._parsed.geturl() - - def delete_all_keys_v2(self, client): - bucket = self.bucket - prefix = self.key - - for response in client.get_paginator('list_objects_v2').paginate(Bucket=bucket, Prefix=prefix): - if 'Contents' not in response: - continue - for content in response['Contents']: - print("Deleting: s3://" + bucket + "/" + content['Key']) - client.delete_object(Bucket=bucket, Key=content['Key']) diff --git a/tests/util.py b/tests/util.py index faef0108..11d4528e 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,4 +1,6 @@ import os +import boto3 +from urllib.parse import urlparse from dbt.config.project import PartialProject @@ -110,3 +112,38 @@ def get_s3_location(): def get_role_arn(): return os.environ.get("DBT_GLUE_ROLE_ARN", f"arn:aws:iam::{get_account_id()}:role/GlueInteractiveSessionRole") + +def cleanup_s3_location(path, region): + client = boto3.client("s3", region_name=region) + S3Url(path).delete_all_keys_v2(client) + + +class S3Url(object): + def __init__(self, url): + self._parsed = urlparse(url, allow_fragments=False) + + @property + def bucket(self): + return self._parsed.netloc + + @property + def key(self): + if self._parsed.query: + return self._parsed.path.lstrip("/") + "?" + self._parsed.query + else: + return self._parsed.path.lstrip("/") + + @property + def url(self): + return self._parsed.geturl() + + def delete_all_keys_v2(self, client): + bucket = self.bucket + prefix = self.key + + for response in client.get_paginator('list_objects_v2').paginate(Bucket=bucket, Prefix=prefix): + if 'Contents' not in response: + continue + for content in response['Contents']: + print("Deleting: s3://" + bucket + "/" + content['Key']) + client.delete_object(Bucket=bucket, Key=content['Key']) From befb4d19c0a081dd78df3616cb444e5afa0033d8 Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Fri, 10 Nov 2023 16:38:31 +0900 Subject: [PATCH 3/5] Renamed util.py for unit tests --- tests/unit/test_adapter.py | 2 +- tests/unit/{utils.py => util.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/unit/{utils.py => util.py} (100%) diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 5032d57f..d161b9a9 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -9,7 +9,7 @@ from dbt.adapters.glue import GlueAdapter from dbt.adapters.glue.relation import SparkRelation from tests.util import config_from_parts_or_dicts -from .utils import MockAWSService +from .util import MockAWSService class TestGlueAdapter(unittest.TestCase): diff --git a/tests/unit/utils.py b/tests/unit/util.py similarity index 100% rename from tests/unit/utils.py rename to tests/unit/util.py From 1317e89f2ae559713b1d331ea73376af5180d64b Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Fri, 10 Nov 2023 17:52:18 +0900 Subject: [PATCH 4/5] Fix unit test --- dev-requirements.txt | 1 + tests/functional/adapter/test_constraints.py | 0 tests/unit/test_adapter.py | 8 ++++---- 3 files changed, 5 insertions(+), 4 deletions(-) delete mode 100644 tests/functional/adapter/test_constraints.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 2027bd1d..72bb4723 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -25,6 +25,7 @@ black==23.11.0 waiter boto3 moto~=4.2.7 +pyparsing dbt-core~=1.7.1 dbt-spark~=1.7.1 diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index d161b9a9..833a2a50 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -36,8 +36,8 @@ def setUp(self): "region": "us-east-1", "workers": 2, "worker_type": "G.1X", - "schema": "dbt_functional_test_01", - "database": "dbt_functional_test_01", + "schema": "dbt_unit_test_01", + "database": "dbt_unit_test_01", } }, "target": "test", @@ -59,7 +59,7 @@ def test_glue_connection(self): self.assertEqual(connection.state, "open") self.assertEqual(connection.type, "glue") - self.assertEqual(connection.credentials.schema, "dbt_functional_test_01") + self.assertEqual(connection.credentials.schema, "dbt_unit_test_01") self.assertIsNotNone(connection.handle) @@ -87,7 +87,7 @@ def test_hudi_merge_table(self): config = self._get_config() adapter = GlueAdapter(config) target_relation = SparkRelation.create( - schema="dbt_functional_test_01", + schema="dbt_unit_test_01", name="test_hudi_merge_table", ) with mock.patch("dbt.adapters.glue.connections.open"): From c6ed306caa6c99fd277d751c4c61a9f844538e19 Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Fri, 10 Nov 2023 18:46:12 +0900 Subject: [PATCH 5/5] Fixed test_generic_tests --- tests/functional/adapter/test_basic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py index 74adf164..8859bf20 100644 --- a/tests/functional/adapter/test_basic.py +++ b/tests/functional/adapter/test_basic.py @@ -251,8 +251,10 @@ def test_generic_tests(self, project): results = run_dbt(["seed"]) relation = relation_from_name(project.adapter, "base") + relation_table_model = relation_from_name(project.adapter, "table_model") # run refresh table to disable the previous parquet file paths project.run_sql(f"refresh table {relation}") + project.run_sql(f"refresh table {relation_table_model}") # test command selecting base model results = run_dbt(["test", "-m", "base"])