From 70cf9b788d67d0db1435a48a0ce94e1bc69c47fe Mon Sep 17 00:00:00 2001 From: ericbaumann <24817191+ericbaumann@users.noreply.github.com> Date: Thu, 17 Aug 2023 17:24:57 -0400 Subject: [PATCH] update dbt-databricks docs --- .../connect-data-platform/databricks-setup.md | 69 +++++++++++++++++-- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/website/docs/docs/core/connect-data-platform/databricks-setup.md b/website/docs/docs/core/connect-data-platform/databricks-setup.md index 0d24a3b04aa..9c8eecb7c97 100644 --- a/website/docs/docs/core/connect-data-platform/databricks-setup.md +++ b/website/docs/docs/core/connect-data-platform/databricks-setup.md @@ -31,8 +31,6 @@ meta:
  • Minimum data platform version: {frontMatter.meta.min_supported_version}
  • -## Installation and Distribution -

    Installing {frontMatter.meta.pypi_package}

    @@ -56,9 +54,51 @@ pip is the easiest way to install the adapter: - Use of Delta Lake for all models out of the box - SQL macros that are optimized to run with [Photon](https://docs.databricks.com/runtime/photon.html) -### Set up a Databricks Target +### Connecting to Databricks + +To connect to a data platform with dbt Core, create appropriate _profile_ and _target_ YAML keys/values in the `profiles.yml` configuration file for your Starburst/Trino clusters. This dbt YAML file lives in the `.dbt/` directory of your user/home directory. For more information, refer to [Connection profiles](/docs/core/connect-data-platform/connection-profiles) and [profiles.yml](/docs/core/connect-data-platform/profiles.yml). + +`dbt-databricks` can connect to Databricks SQL Warehouses and all-purpose clusters. Databricks SQL Warehouses is the recommended way to get started with Databricks. + + +See the [Databricks documentation](https://docs.databricks.com/dev-tools/dbt.html#) on how +to obtain the credentials for configuring your profile. + +## Host parameters + +The following profile fields are always required. + +| Field | Example | Description | +| --------- | ------- | ----------- | +| `host` | `yourorg.databrickshost.com` | The hostname of your cluster.

    Don't include the `http://` or `https://` prefix. | +| `http_path` | `/sql/your/http/path` | The http path to your SQL Warehouse or all-purpose cluster. | +| `schema` | `my_schema` | The name of a schema within your cluster's catalog.

    It's _not recommended_ to use schema names that have upper case or mixed case letters. | + +## Authentication parameters + +The `dbt-databricks` adapter supports both token-based authentication and OAuth client authentication. + +| Field | Example | Description | +| --------- | ------- | ----------- | +| `token` | `dapiXXXXXXXXXXXXXXXXXXXXXXX` | The Personal Access Token (PAT) to connect to Databricks. This is required if you are using token-based authentication. | +| `client_id` | `` | The client ID for your Databricks OAuth application. This is required if you are using OAuth-based authentication. | +| `client_secret` | `XXXXXXXXXXXXXXXXXXXXXXXXXXX` | The client secret for your Databricks OAuth application. This is required if you are using OAuth-based authentication. | +| `auth_type` | `oauth` | This is required if you are using OAuth-based authentication. You do not need to include this value if you are using token-based authentication. | + + +## Additional parameters + +The following profile fields are optional to set up. They let you configure your cluster's session and dbt for your connection. + + +| Profile field | Example | Description | +| ----------------------------- | -------------------------------- | ----------------------------------------------------------------------------------------------------------- | +| `threads` | `8` | How many threads dbt should use (default is `1`) | +| `connect_retries` | `3` | How many times dbt should retry the connection to Databricks (default is `1`) | +| `connect_timeout` | `1000` | How many seconds before the connection to Databricks should timeout (default behavior is no timeouts) | +| `session_properties` | `ansi_mode: true` | Sets Databricks session properties used in the connection. Execute `SET -v` to see available options | -dbt-databricks can connect to the Databricks SQL Warehouses and all-purpose clusters. Databricks SQL Warehouses is the recommended way to get started with Databricks. +#### Example profiles.yml for token-based authentication @@ -78,10 +118,27 @@ your_profile_name: -See the [Databricks documentation](https://docs.databricks.com/dev-tools/dbt.html#) on how -to obtain the credentials for configuring your profile. +#### Example profiles.yml for OAuth-based authentication + +```yaml +your_profile_name: + target: dev + outputs: + dev: + type: databricks + catalog: [optional catalog name if you are using Unity Catalog] + schema: [schema name] + host: [yourorg.databrickshost.com] + http_path: [/sql/your/http/path] + auth_type: oauth + client_id: [OAuth-Client-ID] # The ID of your OAuth application + client_secret: [XXXXXXXXXXXXXXXXXXXXXXXXXXX] # OAuth client secret + threads: [1 or more] # optional, default 1 +``` + + ## Supported Functionality