dbt-labs · daniel-cortez-stevenson · Feb 11, 2022 · Feb 11, 2022 · Feb 11, 2022 · jtcohen6
@@ -5,7 +5,18 @@ orbs:
   azure-cli: circleci/[email protected]
 
 jobs:
-
+
+  integration-athena:
+    docker:
+      - image: cimg/python:3.9.9
+    steps:
+      - checkout
+      - run:
+          name: "Run Tests - Athena"
+          command: ./run_test.sh athena
+      - store_artifacts:
+          path: ./logs
+
   integration-redshift:
     docker:
       - image: cimg/python:3.9.9
@@ -111,6 +122,7 @@ workflows:
       - integration-snowflake
       - integration-bigquery
       - integration-databricks
+      - integration-athena
       #- integration-synapse
       #- integration-azuresql:
       #    requires:

@@ -14,6 +14,7 @@ This package provides:
 * Spark
 * Synapse
 * Azure SQL
+* AWS Athena
 
 ![sample docs](etc/sample_docs.png)
 
@@ -56,6 +57,7 @@ The macros assume that you:
   - an accessible set of files (Spark)
 2. Have the appropriate permissions on to create tables using that scaffolding
 3. Have already created the database/project and/or schema/dataset in which dbt will create external tables (or snowpiped tables)
+4. Have set the top-level key `query-comment:` to an empty value in your `dbt_project.yml` file (Athena only)
 
 ## Spec
 

@@ -10,6 +10,15 @@ integration_tests:
   target: postgres
   outputs:
 
+    athena:
+      type: athena
+      database: "{{ env_var('ATHENA_TEST_DBNAME') }}"
+      region_name: "{{ env_var('AWS_REGION') }}"
+      s3_staging_dir: "s3://{{ env_var('ATHENA_TEST_BUCKET') }}"
+      work_group: "{{ env_var('ATHENA_TEST_WORKGROUP') }}"
+      schema:  dbt_external_tables_integration_tests_athena
+      threads: 1
+
     redshift:
       type: redshift
       host: "{{ env_var('REDSHIFT_TEST_HOST') }}"

@@ -24,9 +24,18 @@ dispatch:
 seeds:
   +quote_columns: false
 
+# FIXME: query-comment must be disabled for Athena to work because /* block comments are unsupported in Athena DML
+# Removing this line will result in a Runtime Error during the integration test 
+#   `2 of 5 (2) create external table dbt.people_csv_partitioned ...`. The error is
+#   "FAILED: ParseException line 1:0 cannot recognize input near '/' '*' '{".
+# Is there a better way around this?
+query-comment:
+
 sources:
   dbt_external_tables_integration_tests:
     plugins:
+      athena:
+        +enabled: "{{ target.type == 'athena' }}"
       redshift:
         +enabled: "{{ target.type == 'redshift' }}"
       snowflake:
@@ -43,6 +52,8 @@ sources:
 tests:
   dbt_external_tables_integration_tests:
     plugins:
+      athena:
+        +enabled: "{{ target.type == 'athena' }}"
       redshift:
         +enabled: "{{ target.type == 'redshift' }}"
       snowflake:

@@ -0,0 +1,116 @@
+version: 2
+
+sources:
+  - name: athena_external
+    schema: "{{ target.schema }}"
+    tables:
+      - name: people_csv_unpartitioned
+        external: &csv-people
+          location: "s3://dbt-external-tables-testing/csv/"
+          row_format: serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+          table_properties: "('skip.header.line.count'='1')"
+        columns: &cols-of-the-people
+          - name: id
+            data_type: int
+          - name: first_name
+            data_type: varchar(64)
+          - name: last_name
+            data_type: varchar(64)
+          - name: email
+            data_type: varchar(64)
+        tests: &equal-to-the-people
+          - dbt_utils.equality:
+              compare_model: ref('people')
+              compare_columns:
+                - id
+                - first_name
+                - last_name
+                - email
+
+      - name: people_csv_partitioned
+        external:
+          <<: *csv-people
+          partitions: &parts-of-the-people
+            - name: section
+              data_type: varchar(1)
+              vals: ['a','b','c','d']
+              path_macro: dbt_external_tables.key_value
+        columns: *cols-of-the-people
+        tests: *equal-to-the-people
+
+      # ensure that all partitions are created
+      - name: people_csv_multipartitioned
+        external:
+          <<: *csv-people
+          location: "s3://dbt-external-tables-testing/"
+          partitions:
+            - name: file_format
+              data_type: varchar(4)
+              vals: ['csv', 'json']
+              path_macro: dbt_external_tables.value_only
+            - name: section
+              data_type: varchar(1)
+              vals: ['a','b','c','d']
+              path_macro: dbt_external_tables.key_value
+            - name: some_date
+              data_type: date
+              vals:
+                macro: dbt.dates_in_range
+                args:
+                  start_date_str: '2020-01-01'
+                  end_date_str: '2020-02-01'
+                  in_fmt: "%Y-%m-%d"
+                  out_fmt: "%Y-%m-%d"
+              path_macro: dbt_external_tables.year_month_day
+            - name: file_name
+              data_type: varchar(10)
+              vals: ['people', 'not_people']
+              path_macro: dbt_external_tables.value_only
+        columns: *cols-of-the-people
+
+      - name: people_csv_multipartitioned_hive_compatible
+        external:
+          <<: *csv-people
+          hive_compatible_partitions: true
+          location: "s3://dbt-external-tables-testing/"
+          partitions:
+            - name: file_format
+              data_type: varchar(4)
+              vals: ['csv', 'json']
+              path_macro: dbt_external_tables.value_only
+            - name: section
+              data_type: varchar(1)
+              vals: ['a','b','c','d']
+              path_macro: dbt_external_tables.key_value
+            - name: some_date
+              data_type: date
+              vals:
+                macro: dbt.dates_in_range
+                args:
+                  start_date_str: '2020-01-01'
+                  end_date_str: '2020-02-01'
+                  in_fmt: "%Y-%m-%d"
+                  out_fmt: "%Y-%m-%d"
+              path_macro: dbt_external_tables.year_month_day
+            - name: file_name
+              data_type: varchar(10)
+              vals: ['people', 'not_people']
+              path_macro: dbt_external_tables.value_only
+        columns: *cols-of-the-people
+
+      - name: people_json_unpartitioned
+        external: &json-people
+          location: "s3://dbt-external-tables-testing/json/"
+          row_format: "serde 'org.openx.data.jsonserde.JsonSerDe'
+            with serdeproperties (
+                'strip.outer.array'='false'
+            )"
+        columns: *cols-of-the-people
+        tests: *equal-to-the-people
+
+      - name: people_json_partitioned
+        external:
+          <<: *json-people
+          partitions: *parts-of-the-people
+        columns: *cols-of-the-people
+        tests: *equal-to-the-people