From c4156812488ba28e62c1261f1f0821aa659eb9e2 Mon Sep 17 00:00:00 2001 From: Daniel Messias Date: Tue, 1 Nov 2022 10:06:05 +0000 Subject: [PATCH] [BUGFIX] Add support for partition fields of type timestamp (#6) * fix glue partition handling (#2) * Add support for partition fields of type timestamp Co-authored-by: nicor88 <6278547+nicor88@users.noreply.github.com> Co-authored-by: yusuf.mahtab --- .github/workflows/python-publish.yml | 2 +- CHANGELOG.md | 2 ++ dbt/adapters/athena/impl.py | 21 +++++++++++-------- .../models/incremental/helpers.sql | 2 ++ setup.py | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) create mode 100644 CHANGELOG.md diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 130b2381..fc95445a 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -3,7 +3,7 @@ name: Upload Python Package on: push: branches: - - 'master' + - 'main' permissions: contents: read diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..3bc6def4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,2 @@ +## v1.0.3 +* Fix issue on fetching partitions from glue, using pagination diff --git a/dbt/adapters/athena/impl.py b/dbt/adapters/athena/impl.py index 165c77b2..c43d3f80 100755 --- a/dbt/adapters/athena/impl.py +++ b/dbt/adapters/athena/impl.py @@ -63,16 +63,19 @@ def clean_up_partitions( with boto3_client_lock: glue_client = boto3.client('glue', region_name=client.region_name) s3_resource = boto3.resource('s3', region_name=client.region_name) - partitions = glue_client.get_partitions( - # CatalogId='123456789012', # Need to make this configurable if it is different from default AWS Account ID - DatabaseName=database_name, - TableName=table_name, - Expression=where_condition - ) - p = re.compile('s3://([^/]*)/(.*)') - for partition in partitions["Partitions"]: + paginator = glue_client.get_paginator("get_partitions") + partition_params = { + "DatabaseName": database_name, + "TableName": table_name, + "Expression": where_condition, + "ExcludeColumnSchema": True, + } + partition_pg = paginator.paginate(**partition_params) + partitions = partition_pg.build_full_result().get('Partitions') + s3_rg = re.compile('s3://([^/]*)/(.*)') + for partition in partitions: logger.debug("Deleting objects for partition '{}' at '{}'", partition["Values"], partition["StorageDescriptor"]["Location"]) - m = p.match(partition["StorageDescriptor"]["Location"]) + m = s3_rg.match(partition["StorageDescriptor"]["Location"]) if m is not None: bucket_name = m.group(1) prefix = m.group(2) diff --git a/dbt/include/athena/macros/materializations/models/incremental/helpers.sql b/dbt/include/athena/macros/materializations/models/incremental/helpers.sql index 3f6b1f59..5ec0cdf0 100644 --- a/dbt/include/athena/macros/materializations/models/incremental/helpers.sql +++ b/dbt/include/athena/macros/materializations/models/incremental/helpers.sql @@ -40,6 +40,8 @@ {%- set value = "'" + col + "'" -%} {%- elif column_type == 'date' -%} {%- set value = "'" + col|string + "'" -%} + {%- elif column_type == 'timestamp' -%} + {%- set value = "'" + col|string + "'" -%} {%- else -%} {%- do exceptions.raise_compiler_error('Need to add support for column type ' + column_type) -%} {%- endif -%} diff --git a/setup.py b/setup.py index 53b8f281..2729b804 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ package_name = "dbt-athena-community" dbt_version = "1.0" -package_version = "1.0.2" +package_version = "1.0.3" description = """The athena adapter plugin for dbt (data build tool)""" if not package_version.startswith(dbt_version):