From 6d195076d5577973e9ff208ba47b35b57363571a Mon Sep 17 00:00:00 2001 From: nicor88 <6278547+nicor88@users.noreply.github.com> Date: Tue, 25 Oct 2022 21:19:32 +0200 Subject: [PATCH] fix glue partition handling (#2) --- .github/workflows/python-publish.yml | 2 +- CHANGELOG.md | 2 ++ dbt/adapters/athena/impl.py | 21 ++++++++++++--------- setup.py | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 CHANGELOG.md diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 130b2381..fc95445a 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -3,7 +3,7 @@ name: Upload Python Package on: push: branches: - - 'master' + - 'main' permissions: contents: read diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..3bc6def4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,2 @@ +## v1.0.3 +* Fix issue on fetching partitions from glue, using pagination diff --git a/dbt/adapters/athena/impl.py b/dbt/adapters/athena/impl.py index 779ea340..292e6e8d 100755 --- a/dbt/adapters/athena/impl.py +++ b/dbt/adapters/athena/impl.py @@ -64,16 +64,19 @@ def clean_up_partitions( with boto3_client_lock: glue_client = boto3.client('glue', region_name=client.region_name) s3_resource = boto3.resource('s3', region_name=client.region_name) - partitions = glue_client.get_partitions( - # CatalogId='123456789012', # Need to make this configurable if it is different from default AWS Account ID - DatabaseName=database_name, - TableName=table_name, - Expression=where_condition - ) - p = re.compile('s3://([^/]*)/(.*)') - for partition in partitions["Partitions"]: + paginator = glue_client.get_paginator("get_partitions") + partition_params = { + "DatabaseName": database_name, + "TableName": table_name, + "Expression": where_condition, + "ExcludeColumnSchema": True, + } + partition_pg = paginator.paginate(**partition_params) + partitions = partition_pg.build_full_result().get('Partitions') + s3_rg = re.compile('s3://([^/]*)/(.*)') + for partition in partitions: logger.debug("Deleting objects for partition '{}' at '{}'", partition["Values"], partition["StorageDescriptor"]["Location"]) - m = p.match(partition["StorageDescriptor"]["Location"]) + m = s3_rg.match(partition["StorageDescriptor"]["Location"]) if m is not None: bucket_name = m.group(1) prefix = m.group(2) diff --git a/setup.py b/setup.py index 53b8f281..2729b804 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ package_name = "dbt-athena-community" dbt_version = "1.0" -package_version = "1.0.2" +package_version = "1.0.3" description = """The athena adapter plugin for dbt (data build tool)""" if not package_version.startswith(dbt_version):