From 77910e43082c6919e8ec23b3db15f475f91f7a2b Mon Sep 17 00:00:00 2001 From: winnie <91998347+gwenwindflower@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:26:54 -0600 Subject: [PATCH] Add testing framework, CI, and make post-commands optional (#6) * Spike out CI workflow yml * Spike out pytest and fixtures * Fix duplication of slug logic in copier.yml * Quote new jinja project_slug in copier.yml * Resotre README bit that was lost again * Expand README with CLI recs and learning resources * Make disclaimer about me working at dbt a footnote * Fix footnote disclaimer formatting * Fix wording of disclaimer footnote * Amended: profiles integration test working; rm'd test-build from git * Integration tests working for BQ, SF, DDB * Expand README re command optionality * Move dev-requirements files to project root, update CI.yml * Capitalize CI in workflow name * Expand template exclude list to include dev-reqs * Compile requirements before installing in CI * Compile requirements before installing in CI corrected to dev-requirements * Fix copier.yml from wonky merge * Ensure tests are running copier state from HEAD * Simplify CI matrix to py 3.10 macos for now * Make 3.10 a string in ci.yml --- .github/workflows/ci.yml | 30 ++++ .gitignore | 4 + README.md | 152 ++++++++++++------ copier.yml | 60 +++++-- dev-requirements.in | 3 + dev-requirements.txt | 62 +++++++ profiles.yml.jinja | 58 +++---- .../test-expectations/bigquery_profile.yml | 10 ++ .../test-expectations/duckdb_profile.yml | 10 ++ .../test-expectations/snowflake_profile.yml | 13 ++ .../test_profile_output_is_correct.py | 70 ++++++++ 11 files changed, 371 insertions(+), 101 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 dev-requirements.in create mode 100644 dev-requirements.txt create mode 100644 template-integration-tests/test-expectations/bigquery_profile.yml create mode 100644 template-integration-tests/test-expectations/duckdb_profile.yml create mode 100644 template-integration-tests/test-expectations/snowflake_profile.yml create mode 100644 template-integration-tests/test_profile_output_is_correct.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e91a610 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +name: CI + +on: + pull_request: + branch: main + +jobs: + ci: + strategy: + matrix: + python-version: ["3.10"] + os: [macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout PR branch + uses: actions/checkout@v4.1.1 + + - name: Setup Python + uses: actions/setup-python@v5.0.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv and requirements + run: | + python -m pip install uv + uv pip compile dev-requirements.in -o dev-requirements.txt + uv pip install -r dev-requirements.txt --system + + - name: Run integration tests + run: pytest template-integration-tests diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04b6975 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv +__pycache__ +template-integration-tests/test-build +.pytest_cache diff --git a/README.md b/README.md index f859e92..91bca61 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ You will need `python3`, [`pipx`](https://github.com/pypa/pipx), and `git` insta - [Non-goals](#non-goals) - [Usage](#usage) - [Tips](#tips) + - [Learning resources](#learning-resources) + - [Improving the command line experience](#improving-the-command-line-experience) ## Features @@ -64,7 +66,7 @@ Before embarking on this quick journey: if your data platform has a CLI tool tha 1. Install `copier` if you haven't already: - ```shell + ```bash pipx install copier ``` @@ -73,25 +75,50 @@ Before embarking on this quick journey: if your data platform has a CLI tool tha 2. Create a new dbt project from this template: - ```shell - copier gh:gwenwindflower/copier-dbt --trust + ```bash + # read below re the --trust flag + copier gh:gwenwindflower/copier-dbt --trust ``` - `gh:` tells copier to use a GitHub repository as the source for the template - - The directory you specify is where the new project will be created, don't create it beforehand - - `copier` will run a series of commands to set up your project after it templates everything. These are listed in the `copier.yml` at the bottom in the `_tasks` list. I highly encourage you to look through these before and make sure you really do trust and understand them before using the `--trust` flag above that will allow them to run. These commands are very straightforward and standard, but letting somebody's code run commands on your machine should always be taken seriously. In brief they will do the following (but seriously go look at the file): - - Create and activate a virtual environment for the project in your newly templated project directory - - Install the project's dependencies - - Put the contents of the `profiles.yml` file in the correct place in your home directory then remove the file from your project for security - - Initialize a new git repo in your project and make an initial commit - - Install the pre-commit hooks in your project - - If you feel more comfortable with it, you can delete the tasks section, skip the `--trust` flag, and run the commands manually after the project is created -3. Follow the prompts to configure your project, depending on your answers to certain prompts, different prompts may appear or disappear (e.g. if you choose your `data_warehouse` as `bigquery` you'll get a different set of questions to configure the `profiles.yml`). + - The directory you specify is where the new project will be created, you don't need to create it beforehand, but do make sure there isn't already a directory with the same name there with work you don't want to mess up + - 🚨 `--trust` will allow copier to _**optionally**_ run a series of commands to set up your project after it templates everything. These are listed in the `copier.yml` at the bottom in the `_tasks` list, and they're detailed below. I highly encourage you to look through these before and make sure you really do trust and understand them before using the `--trust` flag above that will allow them to run. These commands are very straightforward and standard, this is very similar to using a project's `make` commands, `dbt init`, or other build scripts, but letting somebody's code run commands on your machine should always be considered carefully. They are chunked up logically into sections which can be **opted into**, they all default to `False` (no commands run, just templating). The command chunks the template can run for you are: + + - `virtual_environment` — Create and activate a virtual environment for the project in your newly templated project directory, install `uv`, compile a `requirements.txt`, and install those dependencies: + + ```bash + python3 -m venv + source /bin/activate + python3 -m pip install --upgrade pip + python3 -m pip install uv + uv pip compile requirements.in -o requirements.txt + uv pip install -r requirements.txt + ``` + + - Put the contents of the `profiles.yml` file in the correct place in your home directory then remove the file from your project for security (again, no credentials ever get entered but in case you do edit it and put in credentials I don't want you to accidentally commit it) + + ```bash + mkdir -p ~/.dbt && cat profiles.yml >> ~/.dbt/profiles.yml + rm profiles.yml + ``` + + - Initialize a new git repo in your project and make an initial commit, then install the pre-commit hooks in your project for future commits (we need a `.git` directory to install the pre-commit hooks, so we have to do this after the initial commit) + + ```bash + git init + git add --all + git commit -m "Initial commit." + source//bin/activate && pre-commit install + ``` + + - If you feel more comfortable with it you can just clone or fork the repo, delete the tasks section, skip the `--trust` flag, and run the commands manually after the project is created — it will accomplish the same thing just with a bit more manual work — in that case the command to run copier would be `copier copy `. As mentioned though, the tasks all default to `False` so you can opt out of any or all of them even with the `--trust` flag. + +3. Follow the prompts to configure your project, depending on your answers to certain prompts, different prompts may appear or disappear (e.g. if you choose your `data_warehouse` as `bigquery` you'll get a different set of questions to configure the `profiles.yml`, if you leave `virtual_environment` as `False`, we won't prompt you for a virtual environment name). 4. Your project is now ready to use! `cd` into the newly created project and run: - ```shell + ```bash dbt deps dbt debug ``` @@ -101,12 +128,12 @@ Before embarking on this quick journey: if your data platform has a CLI tool tha 5. Start building your dbt project! -- Consider using the included `dbt-codegen` package to build some initial sources and staging models from your data warehouse metadata. -- Once you've got some models built, try running `dbt build` to run and test your models. + - Consider using the included `dbt-codegen` package to build some initial sources and staging models from your data warehouse metadata. + - Once you've got some models built, try running `dbt build` to run and test your models. 6. Push it! -- The setup process will have initialized a git repository for you and made an initial commit of the starting state, so you can go right ahead and push your new project to your favorite git hosting service. It will run the pre-commit hooks automatically on commit, so you don't have to worry about linting or formatting your code before you commit it. + - The setup process will have initialized a git repository for you and made an initial commit of the starting state, so you can go right ahead and push your new project to your favorite git hosting service. It will run the pre-commit hooks automatically on commit, so you don't have to worry about linting or formatting your code before you commit it. ## Tips @@ -116,40 +143,69 @@ Before embarking on this quick journey: if your data platform has a CLI tool tha - If you decide you like `uv`, it may be a good idea to install it globally so you can use it for initializing new projects and other things. You can find the installation instructions in the [ `uv` documentation ](https://github.com/astral-sh/uv). -- Always make sure you're installing Python packages in a virtual environment to avoid dependency conflicts(or using `pipx` if it really is supposed to be global). Not to be a broken record, but _yet another_ cool thing `uv` does is always install your packages into a virtual environment by default, even if it's not activated (unlike `pip`), and it will prompt you to create one if one doesn't exist yet. This comes in _super_ handy to save you from accidentally installing a project's dependencies globally. +- Always make sure you're installing Python packages in a virtual environment to avoid dependency conflicts (or using `pipx` if it really is supposed to be global). Not to be a broken record, but _yet another_ cool thing `uv` does is always install your packages into a virtual environment by default, even if it's not activated (unlike `pip`), and it will prompt you to create one if one doesn't exist yet. This comes in _super_ handy to save you from accidentally installing a project's dependencies globally. - If you need to update any dependencies you can change the version(s) in the `requirements.in` file and run `uv pip compile requirements.in -o requirements.txt` to compile an updated `requirements.txt` file. Then run `uv pip install -r requirements.txt` to install the updated dependencies. -- If you don't want use a cloud warehouse, I recommend using `duckdb` as your local warehouse. It's a really neat database that's super fast on medium-sized data and has one of the best SQL syntaxes in the game right now. It can run completely locally, but you can also easily wire it up to cloud storage like S3 or GCS, or even a cloud warehouse SaaS called [MotherDuck](https://motherduck.com/). - -- Typing long commands is a bummer, if you plan on doing a lot of Python and dbt development, I highly recommend setting up _*aliases*_ for common commands in your shell configuration (`~/.bashrc`, `~/.zshrc`, etc.). For example, you could add the following to your shell configuration to make running dbt and python commands easier (just make sure they don't conflict with existing aliases or commands, customize to your liking!): - ```shell - export EDITOR= - # dbt alias suggestions - alias dbtp="$EDITOR ~/.dbt/profiles.yml" - alias db="dbt build" - alias dbs="dbt build -s" - alias dt="dbt test" - alias dts="dbt test -s" - alias dr="dbt run" - alias drs="dbt run -s" - alias dp="dbt parse" - alias dmv="dbt parse && mf validate-configs" - # Python alias suggestions - alias python="python3" - alias venv="uv venv .venv" - alias va="source .venv/bin/activate" - alias venva="venv && va" - alias pi="uv pip" - alias pir="uv pip install -r" - alias pirr="uv pip install -r requirements.txt" - alias pc="uv pip compile requirements.in -o requirements.txt" - alias piup="uv pip install --upgrade pip" - alias vpi="venva && piup && pirr" - alias vpci="venva && piup && pc && pirr" - # Go-to your project, activate the virtual environment, and open it in your text editor - alias ="cd && venva && $EDITOR ." - ``` - - Notice we can use previously defined aliases in new aliases. For example, `vpci` uses `venva` and `pirr` to update the project's dependencies and install them. +- If you don't want to use a cloud warehouse, I recommend using `duckdb` as your local warehouse. It's a really neat database that's super fast on medium-sized data and has one of the best SQL syntaxes in the game right now. It can run completely locally, but you can also easily wire it up to cloud storage like S3 or GCS, or even a cloud warehouse SaaS called [MotherDuck](https://motherduck.com/). + +### Learning resources + +If you're new to dbt, SQL, or Jinja, I highly recommend the following learning resources: + +- [dbt Learn](https://learn.getdbt.com/) - dbt Labs' official learning platform, with a bunch of great free courses to get you started +- [Mode's SQL Tutorial](https://mode.com/sql-tutorial) - IMO the best free resource to learn SQL from the ground up +- [Jinja's official documentation](https://jinja.palletsprojects.com/en/3.0.x/templates/) - specifically the Template Designer Docs in the link. Jinja is a really powerful templating language that dbt and many other projects use (including `copier` i.e. this repo!). Once you get the basics of dbt and SQL down, learning Jinja will take your dbt projects to the next level. +- [dbt Labs' **How we structure our dbt projects** guide](https://docs.getdbt.com/best-practices/how-we-structure/1-guide-overview) - the standard resource covering the best way to structure your dbt projects and why. This template follows these guidelines.[^2] + +If you're looking to deploy the dbt project you create with this template, the best way is with [dbt Cloud](https://cloud.getdbt.com/).[^2] It includes advanced orchestration, a cloud-based IDE, an interactive visual Explorer with column-level lineage, flexible alerts, [auto-deferral](https://docs.getdbt.com/blog/defer-to-prod), version control, and a lot more. It's the best way to get a dbt project into production quickly, easily, and reliably — and to get multiple people with varied knowledge working on the same project efficiently. If you're interested in trying it out, you can [sign up for a free trial](https://getdbt.com/signup) and get started in minutes. + +### Improving the command line experience + +There are some really useful command line tools for folks developing dbt projects locally (meaning they're using SQL, Jinja, Python, and the command line a lot). Here are a few I recommend: + +- [`zoxide`](https://github.com/ajeetdsouza/zoxide) - a faster, easier-to-use, and more flexible replacement for the `cd` command that learns your habits and saves you a lot of typing with a combination of fuzzy search and frecency (frequency + recency) sorting of your directory changing history +- [`rip`](https://github.com/nivekuil/rip) - a safer and easier-to-use replacement for the `rm` command that moves files to the trash instead of deleting them and lets you recover them if you make a mistake +- [`fzf`](https://github.com/junegunn/fzf) - a fuzzy finder that makes it easy to search through your command history, files, and directories super fast +- [`bat`](https://github.com/sharkdp/bat) - a `cat` replacement that adds syntax highlighting and line numbers, alias it to `cat` and never look back +- [`eza`](https://github.com/eza-community/eza) - a faster and more powerful replacement for the `ls` command +- [`fd`](https://github.com/sharkdp/fd) - a faster and easier-to-use replacement for the `find` command +- [`ripgrep`](https://github.com/BurntSushi/ripgrep) - a much faster and more powerful replacement for the `grep` command +- [`atuin`](https://github.com/atuinsh/atuin) - a more powerful and magical shell history tool, with fuzzy search and a lot of other cool features +- [`starship`](https://starship.rs/) - a really cool and fast shell prompt that's highly customizable (using TOML so it's very easy and readable) and has a lot of cool features, and the default settings are great if you don't want to bother customizing it +- [`kitty`](https://sw.kovidgoyal.net/kitty/) - a fast, feature-rich (great font, image, and mouse support, for example), and highly customizable terminal emulator that's a joy to use + +Typing long commands is a bummer, if you plan on doing a lot of Python and dbt development, I highly recommend setting up _*aliases*_ for common commands in your shell configuration (`~/.bashrc`, `~/.zshrc`, etc.). For example, you could add the following to your shell configuration to make running dbt and python commands easier (just make sure they don't conflict with existing aliases or commands, customize to your liking!): + +```bash +export EDITOR= +# dbt alias suggestions +alias dbtp="$EDITOR ~/.dbt/profiles.yml" +alias db="dbt build" +alias dbs="dbt build -s" +alias dt="dbt test" +alias dts="dbt test -s" +alias dr="dbt run" +alias drs="dbt run -s" +alias dp="dbt parse" +alias dmv="dbt parse && mf validate-configs" +# Python alias suggestions +alias python="python3" +alias venv="uv venv .venv" +alias va="source .venv/bin/activate" +alias venva="venv && va" +alias pi="uv pip" +alias pir="uv pip install -r" +alias pirr="uv pip install -r requirements.txt" +alias pc="uv pip compile requirements.in -o requirements.txt" +alias piup="uv pip install --upgrade pip" +alias vpi="venva && piup && pirr" +alias vpci="venva && piup && pc && pirr" +# Go to your project, activate the virtual environment, and open it in your text editor +alias ="cd && venva && $EDITOR ." +``` + +- Notice we can use previously defined aliases in new aliases. For example, `vpci` uses `venva` and `pirr` to update the project's dependencies and install them. [^1]: I've only selected the most secure and simple authentication method for each warehouse for the time being. You can manually configure more complex and specific authentication methods like password-based authentication, SSO, JSON keys, etc. in the `~/.dbt/profiles.yml` file after the setup process is complete. Wherever possible though, I've opted for _simplicity_ and _security_ — for example the configuration for BigQuery requires that you have installed the `gcloud` CLI and authenticated using OAuth through that. The Redshift authentication method is also the most secure and simple method available, using IAM roles and the `awscli`'s `~/.aws/config` credentials to authenticate. I highly recommend sticking with these methods and using these tools if it's an option. +[^2]: I work for dbt Labs, I'm very biased! 🤷🏻‍♀️ Also I wrote the **How we structure our dbt projects** guide, so y'know, maybe a bit biased there too 😹. diff --git a/copier.yml b/copier.yml index 0a44a15..74d0b6d 100644 --- a/copier.yml +++ b/copier.yml @@ -6,13 +6,13 @@ project_name: project_slug: type: str - default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') }}" + default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') | replace('.', '') }}" help: The slugified name of your project. # Warehouse profile connection configs profile_name: type: str - default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') }}" + default: "{{ project_slug }}" help: | The name of your default dev profile, this is the set of configs for connecting to your warehouse for development. @@ -122,11 +122,33 @@ group_by_and_order_by_style: - implicit - explicit +# Task options +virtual_environment: + type: bool + default: False + help: | + Do you want copier to initialize and activate a virtual environment, + and install dependencies? + +init_repo: + type: bool + default: False + help: | + Do you want copier to initialize and make a first commit to a fresh git repo? + +move_profile: + type: bool + default: False + help: | + Do you want copier to move the contents of your generated `profiles.yml` file + to the appropriate place in your home directory (`~/.dbt/profiles.yml`)? + # Python configs virtual_environment_name: type: str default: .venv help: What do you want your virtual environment to be called? + when: "{{ virtual_environment }}" # Files to exclude from template _exclude: @@ -135,10 +157,17 @@ _exclude: - "~*" - "*.py[co]" - "__pycache__" + - ".pytest_cache" - ".git" + - ".github" - ".DS_STORE" - "README.md" - ".venv" + - ".env" + - "venv" + - "env" + - "dev-requirements.txt" + - "dev-requirements.in" # Pre copy message _message_before_copy: | @@ -171,17 +200,16 @@ _message_after_copy: | # Tasks _tasks: - - "python3 -m venv {{ virtual_environment_name }}" - - "source {{ virtual_environment_name }}/bin/activate" - - "python3 -m pip install uv" - - "source {{ virtual_environment_name }}/bin/activate" - - "uv pip compile requirements.in -o requirements.txt" - - "uv pip install --upgrade pip" - - "uv pip install -r requirements.txt" - - "mkdir -p ~/.dbt && cat profiles.yml >> ~/.dbt/profiles.yml" - - "rm profiles.yml" - - "git init" - - "git add --all" - - "git commit -m 'Initial commit.'" - - "source {{ virtual_environment_name }}/bin/activate && pre-commit install" - + - "{% if virtual_environment %} python3 -m venv {{ virtual_environment_name }} {% endif %}" + - "{% if virtual_environment %} source {{ virtual_environment_name }}/bin/activate {% endif %}" + - "{% if virtual_environment %} python3 -m pip install --upgrade pip {% endif %}" + - "{% if virtual_environment %} python3 -m pip install uv {% endif %}" + - "{% if virtual_environment %} source {{ virtual_environment_name }}/bin/activate {% endif %}" + - "{% if virtual_environment %} uv pip compile requirements.in -o requirements.txt {% endif %}" + - "{% if virtual_environment %} uv pip install -r requirements.txt {% endif %}" + - "{% if move_profile %} mkdir -p ~/.dbt && cat profiles.yml >> ~/.dbt/profiles.yml {% endif %}" + - "{% if move_profile %} rm profiles.yml {% endif %}" + - "{% if init_repo %} git init {% endif %}" + - "{% if init_repo %} git add --all {% endif %}" + - "{% if init_repo %} git commit -m 'Initial commit.' {% endif %}" + - "{% if virtual_environment and init_repo %} source {{ virtual_environment_name }}/bin/activate && pre-commit install {% endif %}" diff --git a/dev-requirements.in b/dev-requirements.in new file mode 100644 index 0000000..6dd8e25 --- /dev/null +++ b/dev-requirements.in @@ -0,0 +1,3 @@ +pytest +copier +deepdiff diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..f303d66 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,62 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile template-integration-tests/dev-requirements.in -o template-integration-tests/dev-requirements.txt +annotated-types==0.6.0 + # via pydantic +colorama==0.4.6 + # via copier +copier==9.1.1 +decorator==5.1.1 + # via copier +dunamai==1.19.2 + # via copier +exceptiongroup==1.2.0 + # via pytest +funcy==2.0 + # via copier +iniconfig==2.0.0 + # via pytest +jinja2==3.1.3 + # via + # copier + # jinja2-ansible-filters +jinja2-ansible-filters==1.3.2 + # via copier +markupsafe==2.1.5 + # via jinja2 +packaging==23.2 + # via + # copier + # dunamai + # pytest +pathspec==0.12.1 + # via copier +pluggy==1.4.0 + # via pytest +plumbum==1.8.2 + # via copier +prompt-toolkit==3.0.36 + # via questionary +pydantic==2.6.3 + # via copier +pydantic-core==2.16.3 + # via pydantic +pygments==2.17.2 + # via copier +pytest==8.1.0 +pyyaml==6.0.1 + # via + # copier + # jinja2-ansible-filters + # pyyaml-include +pyyaml-include==1.3.2 + # via copier +questionary==2.0.1 + # via copier +tomli==2.0.1 + # via pytest +typing-extensions==4.10.0 + # via + # pydantic + # pydantic-core +wcwidth==0.2.13 + # via prompt-toolkit diff --git a/profiles.yml.jinja b/profiles.yml.jinja index 31ea38c..38612bf 100644 --- a/profiles.yml.jinja +++ b/profiles.yml.jinja @@ -3,71 +3,55 @@ target: dev outputs: dev: - type: {{ data_warehouse }} - account: {{ account_env_var }} - user: {{ username }} + type: {{ data_warehouse | lower }} + account: {{ account_env_var | lower }} + user: {{ username | lower }} authenticator: externalbrowser - database: {{ database }} - warehouse: {{ warehouse }} - schema: {{ schema }} - threads: {{ thread_count }} + database: {{ database | lower}} + warehouse: {{ warehouse | lower }} + schema: {{ schema | lower }} + threads: {{ thread_count}} {%- endif -%} -{%- if data_warehouse == "bigquery" -%} +{% if data_warehouse == "bigquery" %} {{ profile_name }}: target: dev outputs: dev: type: {{ data_warehouse }} - project: {{ database }} - dataset: {{ schema }} + project: {{ database | lower }} + dataset: {{ schema | lower }} method: oauth threads: {{ thread_count }} {%- endif -%} -{%- if data_warehouse == "redshift" -%} +{% if data_warehouse == "redshift" %} {{ profile_name }}: target: dev outputs: dev: - type: {{ data_warehouse }} - account: {{ account_env_var }} - user: {{ username }} - database: {{ database }} - schema: {{ schema }} - threads: {{ thread_count }} + # coming soon {%- endif -%} -{%- if data_warehouse == "duckdb" -%} +{% if data_warehouse == "duckdb" %} {{ profile_name }}: target: dev outputs: dev: type: {{ data_warehouse }} - path: {{ duckdb_file_path }} + path: {{ duckdb_file_path | lower }} + database: {{ database | lower }} + schema: {{ schema | lower }} + threads: {{ thread_count }} {%- endif -%} -{%- if data_warehouse == "postgres" -%} +{% if data_warehouse == "postgres" %} {{ profile_name }}: target: dev outputs: dev: - type: {{ data_warehouse }} - account: {{ account_env_var }} - user: {{ username }} - authenticator: externalbrowser - database: {{ database }} - warehouse: {{ warehouse }} - schema: {{ schema }} - threads: {{ thread_count }} + # coming soon {%- endif -%} -{%- if data_warehouse == "databricks" -%} +{% if data_warehouse == "databricks" %} {{ profile_name }}: target: dev outputs: dev: - type: {{ data_warehouse }} - account: {{ account_env_var }} - user: {{ username }} - authenticator: externalbrowser - database: {{ database }} - warehouse: {{ warehouse }} - schema: {{ schema }} - threads: {{ thread_count }} + # coming soon {%- endif -%} diff --git a/template-integration-tests/test-expectations/bigquery_profile.yml b/template-integration-tests/test-expectations/bigquery_profile.yml new file mode 100644 index 0000000..90556f9 --- /dev/null +++ b/template-integration-tests/test-expectations/bigquery_profile.yml @@ -0,0 +1,10 @@ + +legoalas_corp: + target: dev + outputs: + dev: + type: bigquery + project: mirkwood + dataset: archers + method: oauth + threads: 8 diff --git a/template-integration-tests/test-expectations/duckdb_profile.yml b/template-integration-tests/test-expectations/duckdb_profile.yml new file mode 100644 index 0000000..ef2d372 --- /dev/null +++ b/template-integration-tests/test-expectations/duckdb_profile.yml @@ -0,0 +1,10 @@ + +lothlorien_enterprises: + target: dev + outputs: + dev: + type: duckdb + path: ./lothlorien.db + database: mallorn + schema: flets + threads: 8 diff --git a/template-integration-tests/test-expectations/snowflake_profile.yml b/template-integration-tests/test-expectations/snowflake_profile.yml new file mode 100644 index 0000000..ad489d1 --- /dev/null +++ b/template-integration-tests/test-expectations/snowflake_profile.yml @@ -0,0 +1,13 @@ + +aragorn_inc: + target: dev + outputs: + dev: + type: snowflake + account: + user: strider + authenticator: externalbrowser + database: gondor + warehouse: narsil + schema: rangers + threads: 8 diff --git a/template-integration-tests/test_profile_output_is_correct.py b/template-integration-tests/test_profile_output_is_correct.py new file mode 100644 index 0000000..1c43fbd --- /dev/null +++ b/template-integration-tests/test_profile_output_is_correct.py @@ -0,0 +1,70 @@ +import os +import shutil +from pathlib import Path +from typing import Dict + +import copier +import yaml +from deepdiff import DeepDiff + +PROJECT_ROOT = Path(__file__).parent.parent +TEST_EXPECT = PROJECT_ROOT / "template-integration-tests" / "test-expectations" +TEST_BUILD_DIR = PROJECT_ROOT / "template-integration-tests" / "test-build" + + +warehouse_answers: Dict[str, Dict[str, str]] = { + "duckdb": { + "project_name": "Lothlorien Enterprises", + "data_warehouse": "duckdb", + "username": "galadriel", + "database": "mallorn", + "schema": "flets", + "duckdb_file_path": "./lothlorien.db", + }, + "snowflake": { + "project_name": "Aragorn Inc.", + "account_id": "minas_tirith.us-east-1", + "data_warehouse": "snowflake", + "username": "Strider", + "warehouse": "Narsil", + "role": "King", + "database": "gondor", + "schema": "rangers", + }, + "bigquery": { + "project_name": "Legoalas Corp", + "data_warehouse": "bigquery", + "username": "legolas", + "database": "mirkwood", + "schema": "archers", + }, +} + + +def _check_profiles(warehouse): + data = warehouse_answers[warehouse] + + copier.run_copy( + str(PROJECT_ROOT), + str(TEST_BUILD_DIR / warehouse), + data=data, + defaults=True, + unsafe=True, + vcs_ref="HEAD", + ) + + with open(TEST_EXPECT / f"{warehouse}_profile.yml", "r") as f: + expected_output = yaml.safe_load(f) + with open(TEST_BUILD_DIR / warehouse / "profiles.yml", "r") as f: + actual_output = yaml.safe_load(f) + + diff = DeepDiff(expected_output, actual_output) + assert diff == {}, f"Differences: {diff}" + + +def test_profile_output_is_correct(): + if os.path.exists(TEST_BUILD_DIR): + shutil.rmtree(TEST_BUILD_DIR) + + for warehouse in warehouse_answers: + _check_profiles(warehouse)