From 227a9794a79f1779842c9abe5946685dbc7b5468 Mon Sep 17 00:00:00 2001 From: Astralidea Date: Tue, 17 Oct 2023 17:04:40 +0800 Subject: [PATCH] add adapter docs for starrocks --- website/docs/docs/community-adapters.md | 12 +- .../connect-data-platform/starrocks-setup.md | 103 ++++++++++++++++ .../resource-configs/starrocks-configs.md | 116 ++++++++++++++++++ website/sidebars.js | 2 + 4 files changed, 227 insertions(+), 6 deletions(-) create mode 100644 website/docs/docs/core/connect-data-platform/starrocks-setup.md create mode 100644 website/docs/reference/resource-configs/starrocks-configs.md diff --git a/website/docs/docs/community-adapters.md b/website/docs/docs/community-adapters.md index 87d1bd4981e..444ea0e04b4 100644 --- a/website/docs/docs/community-adapters.md +++ b/website/docs/docs/community-adapters.md @@ -11,10 +11,10 @@ Community adapters are adapter plugins contributed and maintained by members of | [Clickhouse](/docs/core/connect-data-platform/clickhouse-setup) | [Hive](/docs/core/connect-data-platform/hive-setup) | [Rockset](/docs/core/connect-data-platform/rockset-setup) | | [IBM DB2](/docs/core/connect-data-platform/ibmdb2-setup) | [Impala](/docs/core/connect-data-platform/impala-setup) | [SingleStore](/docs/core/connect-data-platform/singlestore-setup) | | [Doris & SelectDB](/docs/core/connect-data-platform/doris-setup) | [Infer](/docs/core/connect-data-platform/infer-setup) | [SQLite](/docs/core/connect-data-platform/sqlite-setup) | -| [DuckDB](/docs/core/connect-data-platform/duckdb-setup) | [iomete](/docs/core/connect-data-platform/iomete-setup) | [SQL Server & Azure SQL](/docs/core/connect-data-platform/mssql-setup) | -| [Dremio](/docs/core/connect-data-platform/dremio-setup) | [Layer](/docs/core/connect-data-platform/layer-setup) | [Teradata](/docs/core/connect-data-platform/teradata-setup) | -| [Exasol Analytics](/docs/core/connect-data-platform/exasol-setup) | [Materialize](/docs/core/connect-data-platform/materialize-setup) | [TiDB](/docs/core/connect-data-platform/tidb-setup) | -| [Firebolt](/docs/core/connect-data-platform/firebolt-setup) | [MindsDB](/docs/core/connect-data-platform/mindsdb-setup) | [Vertica](/docs/core/connect-data-platform/vertica-setup) | -| [AWS Glue](/docs/core/connect-data-platform/glue-setup) | [MySQL](/docs/core/connect-data-platform/mysql-setup)| [Upsolver](/docs/core/connect-data-platform/upsolver-setup) | -| [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [fal - Python models](/docs/core/connect-data-platform/fal-setup) | | +| [Starrocks](/docs/core/connect-data-platform/starrocks-setup) | [DuckDB](/docs/core/connect-data-platform/duckdb-setup) | [iomete](/docs/core/connect-data-platform/iomete-setup) +| [SQL Server & Azure SQL](/docs/core/connect-data-platform/mssql-setup) | [Dremio](/docs/core/connect-data-platform/dremio-setup) | [Layer](/docs/core/connect-data-platform/layer-setup) +| [Teradata](/docs/core/connect-data-platform/teradata-setup) | [Exasol Analytics](/docs/core/connect-data-platform/exasol-setup) | [Materialize](/docs/core/connect-data-platform/materialize-setup) +| [TiDB](/docs/core/connect-data-platform/tidb-setup) | [Firebolt](/docs/core/connect-data-platform/firebolt-setup) | [MindsDB](/docs/core/connect-data-platform/mindsdb-setup) +| [Vertica](/docs/core/connect-data-platform/vertica-setup) | [AWS Glue](/docs/core/connect-data-platform/glue-setup) | [MySQL](/docs/core/connect-data-platform/mysql-setup) | +| [Upsolver](/docs/core/connect-data-platform/upsolver-setup) | [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [fal - Python models](/docs/core/connect-data-platform/fal-setup) | diff --git a/website/docs/docs/core/connect-data-platform/starrocks-setup.md b/website/docs/docs/core/connect-data-platform/starrocks-setup.md new file mode 100644 index 00000000000..e5c1abac037 --- /dev/null +++ b/website/docs/docs/core/connect-data-platform/starrocks-setup.md @@ -0,0 +1,103 @@ +--- +title: "Starrocks setup" +description: "Read this guide to learn about the Starrocks warehouse setup in dbt." +id: "starrocks-setup" +meta: + maintained_by: Starrocks + authors: Astralidea + github_repo: 'StarRocks/starrocks/tree/main/contrib/dbt-connector' + pypi_package: 'dbt-starrocks' + min_core_version: 'v1.6.2' + min_supported_version: 'Starrocks 2.5' + cloud_support: Not Supported + slack_channel_name: '#db-starrocks' + slack_channel_link: 'https://www.getdbt.com/community' + platform_name: 'Starrocks' + config_page: '/reference/resource-configs/starrocks-configs' +--- + +

Overview of {frontMatter.meta.pypi_package}

+ + + + +

Installing {frontMatter.meta.pypi_package}

+ +pip is the easiest way to install the adapter: + +pip install {frontMatter.meta.pypi_package} + +

Installing {frontMatter.meta.pypi_package} will also install dbt-core and any other dependencies.

+ +

Configuring {frontMatter.meta.pypi_package}

+ +

For {frontMatter.meta.platform_name}-specifc configuration please refer to {frontMatter.meta.platform_name} Configuration

+ +

For further info, refer to the GitHub repository: {frontMatter.meta.github_repo}

+ + +## Authentication Methods + +### User / Password Authentication + +Starrocks can be configured using basic user/password authentication as shown below. + + + +```yaml +my-starrocks-db: + target: dev + outputs: + dev: + type: starrocks + host: localhost + port: 9030 + schema: analytics + + # User/password auth + username: your_starrocks_username + password: your_starrocks_password +``` + + + +#### Description of Profile Fields +| Option | Description | Required? | Example | +|----------|--------------------------------------------------------|-----------|--------------------------------| +| type | The specific adapter to use | Required | `starrocks` | +| host | The hostname to connect to | Required | `192.168.100.28` | +| port | The port to use | Required | `9030` | +| schema | Specify the schema (database) to build models into | Required | `analytics` | +| username | The username to use to connect to the server | Required | `dbt_admin` | +| password | The password to use for authenticating to the server | Required | `correct-horse-battery-staple` | +| version | Let Plugin try to go to a compatible starrocks version | Optional | `3.1.0` | + +## Supported features + +| Starrocks <= 2.5 | Starrocks 2.5 ~ 3.1 | Starrocks >= 3.1 | Feature | +|:----------------:|:--------------------:|:-----------------:|:---------------------------------:| +| ✅ | ✅ | ✅ | Table materialization | +| ✅ | ✅ | ✅ | View materialization | +| ❌ | ❌ | ✅ | Materialized View materialization | +| ❌ | ✅ | ✅ | Incremental materialization | +| ❌ | ✅ | ✅ | Primary Key Model | +| ✅ | ✅ | ✅ | Sources | +| ✅ | ✅ | ✅ | Custom data tests | +| ✅ | ✅ | ✅ | Docs generate | +| ❌ | ❌ | ❌ | Kafka | + +### Notice +1. When StarRocks Version < 2.5, `Create table as` can only set engine='OLAP' and table_type='DUPLICATE' +2. When StarRocks Version >= 2.5, `Create table as` supports table_type='PRIMARY' +3. When StarRocks Version < 3.1 distributed_by is required + +It is recommended to use the latest starrocks version and dbt-starrocks version for the best experience. \ No newline at end of file diff --git a/website/docs/reference/resource-configs/starrocks-configs.md b/website/docs/reference/resource-configs/starrocks-configs.md new file mode 100644 index 00000000000..093534515c6 --- /dev/null +++ b/website/docs/reference/resource-configs/starrocks-configs.md @@ -0,0 +1,116 @@ +--- +title: "Starrocks configurations" +id: "starrocks-configs" +description: "Starrocks Configurations - Read this in-depth guide to learn about configurations in dbt." +--- + +## Model Configuration + +A dbt model can be configured using the following syntax: + + + + + + + +```yaml +models: + : + materialized: table // table or view or materialized_view + keys: ['id', 'name', 'some_date'] + table_type: 'PRIMARY' // PRIMARY or DUPLICATE or UNIQUE + distributed_by: ['id'] + buckets: 3 // default 10 + partition_by: ['some_date'] + partition_by_init: ["PARTITION p1 VALUES [('1971-01-01 00:00:00'), ('1991-01-01 00:00:00')),PARTITION p1972 VALUES [('1991-01-01 00:00:00'), ('1999-01-01 00:00:00'))"] + properties: [{"replication_num":"1", "in_memory": "true"}] + refresh_method: 'async' // only for materialized view default manual +``` + + + + + + + +```yaml +models: + - name: + config: + materialized: table // table or view or materialized_view + keys: ['id', 'name', 'some_date'] + table_type: 'PRIMARY' // PRIMARY or DUPLICATE or UNIQUE + distributed_by: ['id'] + buckets: 3 // default 10 + partition_by: ['some_date'] + partition_by_init: ["PARTITION p1 VALUES [('1971-01-01 00:00:00'), ('1991-01-01 00:00:00')),PARTITION p1972 VALUES [('1991-01-01 00:00:00'), ('1999-01-01 00:00:00'))"] + properties: [{"replication_num":"1", "in_memory": "true"}] + refresh_method: 'async' // only for materialized view default manual +``` + + + + + + + +```jinja +{{ config( + materialized = 'table', + keys=['id', 'name', 'some_date'], + table_type='PRIMARY', + distributed_by=['id'], + buckets=3, + partition_by=['some_date'], + .... +) }} +``` + + + + +### Configuration Description + +| Option | Description | +|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `materialized` | How the model will be materialized into Starrocks. Supports view, table, incremental, ephemeral, and materialized_view. | +| `keys` | Which columns serve as keys. | +| `table_type` | Table type, supported are PRIMARY or DUPLICATE or UNIQUE. | +| `distributed_by` | Specifies the column of data distribution. If not specified, it defaults to random. | +| `buckets` | The bucket number in one partition. If not specified, it will be automatically inferred. | +| `partition_by` | The partition column list. | +| `partition_by_init` | The partition rule or some real partitions item. | +| `properties` | The table properties configuration of Starrocks. ([Starrocks table properties](https://docs.starrocks.io/en-us/latest/sql-reference/sql-statements/data-definition/CREATE_TABLE#properties)) | +| `refresh_method` | How to refresh materialized views. | + +## Read From Catalog +First you need to add this catalog to starrocks. The following is an example of hive. + +```sql +CREATE EXTERNAL CATALOG `hive_catalog` +PROPERTIES ( + "hive.metastore.uris" = "thrift://127.0.0.1:8087", + "type"="hive" +); +``` +How to add other types of catalogs can be found in the documentation. [Catalog Overview](https://docs.starrocks.io/en-us/latest/data_source/catalog/catalog_overview) Then write the sources.yaml file. +```yaml +sources: + - name: external_example + schema: hive_catalog.hive_db + tables: + - name: hive_table_name +``` +Finally, you might use below marco quote +```jinja +{{ source('external_example', 'hive_table_name') }} +``` \ No newline at end of file diff --git a/website/sidebars.js b/website/sidebars.js index b4da9d911d7..77827b03507 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -225,6 +225,7 @@ const sidebarSettings = { "docs/core/connect-data-platform/fal-setup", "docs/core/connect-data-platform/decodable-setup", "docs/core/connect-data-platform/upsolver-setup", + "docs/core/connect-data-platform/starrocks-setup", ], }, ], @@ -670,6 +671,7 @@ const sidebarSettings = { "reference/resource-configs/fal-configs", "reference/resource-configs/oracle-configs", "reference/resource-configs/upsolver-configs", + "reference/resource-configs/starrocks-configs", ], }, {