diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 57838dae..52bcc14c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -67,14 +67,14 @@ Thanks for your interest in contributing code to schemachange! 2. Activate your virtual environment. The following table is a replication of [this](https://docs.python.org/3/library/venv.html#how-venvs-work) table: - | Platform | Shell | Command | - |---------- |------------ |--------------------------------------- | - | POSIX | bash/zsh | `$ source /bin/activate` | - | POSIX | fish | `$ source /bin/activate.fish` | - | POSIX | csh/tcsh | `$ source /bin/activate.csh` | - | POSIX | PowerShell | `$ /bin/Activate.ps1` | - | Windows | cmd.exe | `C:\> \Scripts\activate.bat` | - | Windows | PowerShell | `PS C:\> \Scripts\Activate.ps1` | + | Platform | Shell | Command | + |----------|------------|---------------------------------------| + | POSIX | bash/zsh | `$ source /bin/activate` | + | POSIX | fish | `$ source /bin/activate.fish` | + | POSIX | csh/tcsh | `$ source /bin/activate.csh` | + | POSIX | PowerShell | `$ /bin/Activate.ps1` | + | Windows | cmd.exe | `C:\> \Scripts\activate.bat` | + | Windows | PowerShell | `PS C:\> \Scripts\Activate.ps1` | 3. With your virtual environment activated, upgrade pip ```bash @@ -86,7 +86,6 @@ Thanks for your interest in contributing code to schemachange! pip install -e .[dev] ``` - 3. Develop your contribution + Create a branch for the features you want to work on. Since the branch name will appear in the merge message, use a sensible name such as 'update-build-library-dependencies': @@ -114,5 +113,5 @@ Thanks for your interest in contributing code to schemachange! + Go to GitHub. The new branch will show up with a green [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests#initiating-the-pull-request) - button. Make sure the title and message are clear, concise and self explanatory. Then click the button to submit + button. Make sure the title and message are clear, concise and self-explanatory. Then click the button to submit it. diff --git a/README.md b/README.md index f5196d3a..491bd311 100644 --- a/README.md +++ b/README.md @@ -1,62 +1,72 @@ # schemachange + schemachange *Looking for snowchange? You've found the right spot. snowchange has been renamed to schemachange.* [![pytest](https://github.com/Snowflake-Labs/schemachange/actions/workflows/master-pytest.yml/badge.svg)](https://github.com/Snowflake-Labs/schemachange/actions/workflows/master-pytest.yml) [![PyPI](https://img.shields.io/pypi/v/schemachange.svg)](https://pypi.org/project/schemachange) + ## Overview -schemachange is a simple python based tool to manage all of your [Snowflake](https://www.snowflake.com/) objects. It follows an Imperative-style approach to Database Change Management (DCM) and was inspired by the [Flyway database migration tool](https://flywaydb.org). When combined with a version control system and a CI/CD tool, database changes can be approved and deployed through a pipeline using modern software delivery practices. As such schemachange plays a critical role in enabling Database (or Data) DevOps. +schemachange is a simple python based tool to manage all of your [Snowflake](https://www.snowflake.com/) objects. It +follows an Imperative-style approach to Database Change Management (DCM) and was inspired by +the [Flyway database migration tool](https://flywaydb.org). When combined with a version control system and a CI/CD +tool, database changes can be approved and deployed through a pipeline using modern software delivery practices. As such +schemachange plays a critical role in enabling Database (or Data) DevOps. -DCM tools (also known as Database Migration, Schema Change Management, or Schema Migration tools) follow one of two approaches: Declarative or Imperative. For a background on Database DevOps, including a discussion on the differences between the Declarative and Imperative approaches, please read the [Embracing Agile Software Delivery and DevOps with Snowflake](https://www.snowflake.com/blog/embracing-agile-software-delivery-and-devops-with-snowflake/) blog post. +DCM tools (also known as Database Migration, Schema Change Management, or Schema Migration tools) follow one of two +approaches: Declarative or Imperative. For a background on Database DevOps, including a discussion on the differences +between the Declarative and Imperative approaches, please read +the [Embracing Agile Software Delivery and DevOps with Snowflake](https://www.snowflake.com/blog/embracing-agile-software-delivery-and-devops-with-snowflake/) +blog post. For the complete list of changes made to schemachange check out the [CHANGELOG](CHANGELOG.md). -**Please note** that schemachange is a community-developed tool, not an official Snowflake offering. It comes with no support or warranty. +**Please note** that schemachange is a community-developed tool, not an official Snowflake offering. It comes with no +support or warranty. ## Table of Contents 1. [Overview](#overview) 1. [Project Structure](#project-structure) - 1. [Folder Structure](#folder-structure) + 1. [Folder Structure](#folder-structure) 1. [Change Scripts](#change-scripts) - 1. [Versioned Script Naming](#versioned-script-naming) - 1. [Repeatable Script Naming](#repeatable-script-naming) - 1. [Always Script Naming](#always-script-naming) - 1. [Script Requirements](#script-requirements) - 1. [Using Variables in Scripts](#using-variables-in-scripts) - 1. [Secrets filtering](#secrets-filtering) - 1. [Jinja templating engine](#jinja-templating-engine) - 1. [Gotchas](#gotchas) + 1. [Versioned Script Naming](#versioned-script-naming) + 1. [Repeatable Script Naming](#repeatable-script-naming) + 1. [Always Script Naming](#always-script-naming) + 1. [Script Requirements](#script-requirements) + 1. [Using Variables in Scripts](#using-variables-in-scripts) + 1. [Secrets filtering](#secrets-filtering) + 1. [Jinja templating engine](#jinja-templating-engine) + 1. [Gotchas](#gotchas) 1. [Change History Table](#change-history-table) 1. [Authentication](#authentication) - 1. [Password Authentication](#password-authentication) - 1. [Private Key Authentication](#private-key-authentication) - 1. [Oauth Authentication](#oauth-authentication) - 1. [External Browser Authentication](#external-browser-authentication) - 1. [Okta Authentication](#okta-authentication) + 1. [Password Authentication](#password-authentication) + 1. [Private Key Authentication](#private-key-authentication) + 1. [Oauth Authentication](#oauth-authentication) + 1. [External Browser Authentication](#external-browser-authentication) + 1. [Okta Authentication](#okta-authentication) 1. [Configuration](#configuration) - 1. [YAML Config File](#yaml-config-file) - 1. [Yaml Jinja support](#yaml-jinja-support) - 1. [Command Line Arguments](#command-line-arguments) + 1. [YAML Config File](#yaml-config-file) + 1. [Yaml Jinja support](#yaml-jinja-support) + 1. [Command Line Arguments](#command-line-arguments) 1. [Running schemachange](#running-schemachange) - 1. [Prerequisites](#prerequisites) - 1. [Running The Script](#running-the-script) -1. [Getting Started with schemachange](#getting-started-with-schemachange) + 1. [Prerequisites](#prerequisites) + 1. [Running The Script](#running-the-script) 1. [Integrating With DevOps](#integrating-with-devops) - 1. [Sample DevOps Process Flow](#sample-devops-process-flow) - 1. [Using in a CI/CD Pipeline](#using-in-a-cicd-pipeline) + 1. [Sample DevOps Process Flow](#sample-devops-process-flow) + 1. [Using in a CI/CD Pipeline](#using-in-a-cicd-pipeline) 1. [Maintainers](#maintainers) 1. [Third Party Packages](#third-party-packages) 1. [Legal](#legal) - ## Project Structure ### Folder Structure schemachange expects a directory structure like the following to exist: + ``` (project_root) | @@ -71,13 +81,19 @@ schemachange expects a directory structure like the following to exist: |-- R__fn_sort_ascii.sql ``` -The schemachange folder structure is very flexible. The `project_root` folder is specified with the `-f` or `--root-folder` argument. schemachange only pays attention to the filenames, not the paths. Therefore, under the `project_root` folder you are free to arrange the change scripts any way you see fit. You can have as many subfolders (and nested subfolders) as you would like. +The schemachange folder structure is very flexible. The `project_root` folder is specified with the `-f` +or `--root-folder` argument. schemachange only pays attention to the filenames, not the paths. Therefore, under +the `project_root` folder you are free to arrange the change scripts any way you see fit. You can have as many +subfolders (and nested subfolders) as you would like. ## Change Scripts ### Versioned Script Naming -Versioned change scripts follow a similar naming convention to that used by [Flyway Versioned Migrations](https://flywaydb.org/documentation/migrations#versioned-migrations). The script name must follow this pattern (image taken from [Flyway docs](https://flywaydb.org/documentation/migrations#versioned-migrations)): +Versioned change scripts follow a similar naming convention to that used +by [Flyway Versioned Migrations](https://flywaydb.org/documentation/migrations#versioned-migrations). The script name +must follow this pattern (image taken +from [Flyway docs](https://flywaydb.org/documentation/migrations#versioned-migrations)): Flyway naming conventions @@ -86,21 +102,29 @@ With the following rules for each part of the filename: * **Prefix**: The letter 'V' for versioned change * **Version**: A unique version number with dots or underscores separating as many number parts as you like * **Separator**: __ (two underscores) -* **Description**: An arbitrary description with words separated by underscores or spaces (can not include two underscores) +* **Description**: An arbitrary description with words separated by underscores or spaces (can not include two + underscores) * **Suffix**: .sql or .sql.jinja -For example, a script name that follows this convention is: `V1.1.1__first_change.sql`. As with Flyway, the unique version string is very flexible. You just need to be consistent and always use the same convention, like 3 sets of numbers separated by periods. Here are a few valid version strings: +For example, a script name that follows this convention is: `V1.1.1__first_change.sql`. As with Flyway, the unique +version string is very flexible. You just need to be consistent and always use the same convention, like 3 sets of +numbers separated by periods. Here are a few valid version strings: * 1.1 * 1_1 * 1.2.3 * 1_2_3 -Every script within a database folder must have a unique version number. schemachange will check for duplicate version numbers and throw an error if it finds any. This helps to ensure that developers who are working in parallel don't accidently (re-)use the same version number. +Every script within a database folder must have a unique version number. schemachange will check for duplicate version +numbers and throw an error if it finds any. This helps to ensure that developers who are working in parallel don't +accidently (re-)use the same version number. ### Repeatable Script Naming -Repeatable change scripts follow a similar naming convention to that used by [Flyway Versioned Migrations](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations). The script name must follow this pattern (image taken from [Flyway docs](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations): +Repeatable change scripts follow a similar naming convention to that used +by [Flyway Versioned Migrations](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations). The +script name must follow this pattern (image taken +from [Flyway docs](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations): Flyway naming conventions @@ -111,13 +135,16 @@ e.g: * R__fn_sort_ascii.sql All repeatable change scripts are applied each time the utility is run, if there is a change in the file. -Repeatable scripts could be used for maintaining code that always needs to be applied in its entirety. e.g. stores procedures, functions and view definitions etc. +Repeatable scripts could be used for maintaining code that always needs to be applied in its entirety. e.g. stores +procedures, functions and view definitions etc. -Just like Flyway, within a single migration run, repeatable scripts are always applied after all pending versioned scripts have been executed. Repeatable scripts are applied in alphabetical order of their description. +Just like Flyway, within a single migration run, repeatable scripts are always applied after all pending versioned +scripts have been executed. Repeatable scripts are applied in alphabetical order of their description. ### Always Script Naming -Always change scripts are executed with every run of schemachange. This is an addition to the implementation of [Flyway Versioned Migrations](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations). +Always change scripts are executed with every run of schemachange. This is an addition to the implementation +of [Flyway Versioned Migrations](https://flywaydb.org/documentation/concepts/migrations.html#repeatable-migrations). The script name must follow this pattern: `A__Some_description.sql` @@ -131,21 +158,37 @@ This type of change script is useful for an environment set up after cloning. Al ### Script Requirements -schemachange is designed to be very lightweight and not impose too many limitations. Each change script can have any number of SQL statements within it and must supply the necessary context, like database and schema names. The context can be supplied by using an explicit `USE ` command or by naming all objects with a three-part name (`..`). schemachange will simply run the contents of each script against the target Snowflake account, in the correct order. +schemachange is designed to be very lightweight and not impose too many limitations. Each change script can have any +number of SQL statements within it and must supply the necessary context, like database and schema names. The context +can be supplied by using an explicit `USE ` command or by naming all objects with a three-part +name (`..`). schemachange will simply run the contents of each script against +the target Snowflake account, in the correct order. ### Using Variables in Scripts -schemachange supports the jinja engine for a variable replacement strategy. One important use of variables is to support multiple environments (dev, test, prod) in a single Snowflake account by dynamically changing the database name during deployment. To use a variable in a change script, use this syntax anywhere in the script: `{{ variable1 }}`. -To pass variables to schemachange, check out the [Configuration](#configuration) section below. You can either use the `--vars` command line parameter or the YAML config file `schemachange-config.yml`. For the command line version you can pass variables like this: `--vars '{"variable1": "value", "variable2": "value2"}'`. This parameter accepts a flat JSON object formatted as a string. +schemachange supports the jinja engine for a variable replacement strategy. One important use of variables is to support +multiple environments (dev, test, prod) in a single Snowflake account by dynamically changing the database name during +deployment. To use a variable in a change script, use this syntax anywhere in the script: `{{ variable1 }}`. + +To pass variables to schemachange, check out the [Configuration](#configuration) section below. You can either use +the `--vars` command line parameter or the YAML config file `schemachange-config.yml`. For the command line version you +can pass variables like this: `--vars '{"variable1": "value", "variable2": "value2"}'`. This parameter accepts a flat +JSON object formatted as a string. > *Nested objects and arrays don't make sense at this point and aren't supported.* -schemachange will replace any variable placeholders before running your change script code and will throw an error if it finds any variable placeholders that haven't been replaced. +schemachange will replace any variable placeholders before running your change script code and will throw an error if it +finds any variable placeholders that haven't been replaced. #### Secrets filtering -While many CI/CD tools already have the capability to filter secrets, it is best that any tool also does not output secrets to the console or logs. Schemachange implements secrets filtering in a number of areas to ensure secrets are not writen to the console or logs. The only exception is the `render` command which will display secrets. -A secret is just a standard variable that has been tagged as a secret. This is determined using a naming convention and either of the following will tag a variable as a secret: +While many CI/CD tools already have the capability to filter secrets, it is best that any tool also does not output +secrets to the console or logs. Schemachange implements secrets filtering in a number of areas to ensure secrets are not +writen to the console or logs. The only exception is the `render` command which will display secrets. + +A secret is just a standard variable that has been tagged as a secret. This is determined using a naming convention and +either of the following will tag a variable as a secret: + 1. The variable name has the word `secret` in it. ```yaml config-version: 1 @@ -168,23 +211,40 @@ A secret is just a standard variable that has been tagged as a secret. This is d ``` ### Jinja templating engine -schemachange uses the Jinja templating engine internally and supports: [expressions](https://jinja.palletsprojects.com/en/3.0.x/templates/#expressions), [macros](https://jinja.palletsprojects.com/en/3.0.x/templates/#macros), [includes](https://jinja.palletsprojects.com/en/3.0.x/templates/#include) and [template inheritance](https://jinja.palletsprojects.com/en/3.0.x/templates/#template-inheritance). -These files can be stored in the root-folder but schemachange also provides a separate modules folder `--modules-folder`. This allows common logic to be stored outside of the main changes scripts. The [demo/citibike_demo_jinja](demo/citibike_demo_jinja) has a simple example that demonstrates this. +schemachange uses the Jinja templating engine internally and +supports: [expressions](https://jinja.palletsprojects.com/en/3.0.x/templates/#expressions), [macros](https://jinja.palletsprojects.com/en/3.0.x/templates/#macros), [includes](https://jinja.palletsprojects.com/en/3.0.x/templates/#include) +and [template inheritance](https://jinja.palletsprojects.com/en/3.0.x/templates/#template-inheritance). + +These files can be stored in the root-folder but schemachange also provides a separate modules +folder `--modules-folder`. This allows common logic to be stored outside of the main changes scripts. +The [demo/citibike_demo_jinja](demo/citibike_demo_jinja) has a simple example that demonstrates this. -The Jinja autoescaping feature is disabled in schemachange, this feature in Jinja is currently designed for where the output language is HTML/XML. So if you are using schemachange with untrusted inputs you will need to handle this within your change scripts. +The Jinja autoescaping feature is disabled in schemachange, this feature in Jinja is currently designed for where the +output language is HTML/XML. So if you are using schemachange with untrusted inputs you will need to handle this within +your change scripts. ### Gotchas Within change scripts: + - [Snowflake Scripting blocks need delimiters](https://docs.snowflake.com/en/developer-guide/snowflake-scripting/running-examples#introduction) - [The last line can't be a comment](https://github.com/Snowflake-Labs/schemachange/issues/130) ## Change History Table -schemachange records all applied changes scripts to the change history table. By default, schemachange will attempt to log all activities to the `METADATA.SCHEMACHANGE.CHANGE_HISTORY` table. The name and location of the change history table can be overriden by using the `-c` (or `--change-history-table`) parameter. The value passed to the parameter can have a one, two, or three part name (e.g. "TABLE_NAME", or "SCHEMA_NAME.TABLE_NAME", or "DATABASE_NAME.SCHEMA_NAME.TABLE_NAME"). This can be used to support multiple environments (dev, test, prod) or multiple subject areas within the same Snowflake account. By default, schemachange will not try to create the change history table, and will fail if the table does not exist. +schemachange records all applied changes scripts to the change history table. By default, schemachange will attempt to +log all activities to the `METADATA.SCHEMACHANGE.CHANGE_HISTORY` table. The name and location of the change history +table can be overriden by using the `-c` (or `--change-history-table`) parameter. The value passed to the parameter can +have a one, two, or three part name (e.g. "TABLE_NAME", or "SCHEMA_NAME.TABLE_NAME", or " +DATABASE_NAME.SCHEMA_NAME.TABLE_NAME"). This can be used to support multiple environments (dev, test, prod) or multiple +subject areas within the same Snowflake account. By default, schemachange will not try to create the change history +table, and will fail if the table does not exist. -Additionally, if the `--create-change-history-table` parameter is given, then schemachange will attempt to create the schema and table associated with the change history table. schemachange will not attempt to create the database for the change history table, so that must be created ahead of time, even when using the `--create-change-history-table` parameter. +Additionally, if the `--create-change-history-table` parameter is given, then schemachange will attempt to create the +schema and table associated with the change history table. schemachange will not attempt to create the database for the +change history table, so that must be created ahead of time, even when using the `--create-change-history-table` +parameter. The structure of the `CHANGE_HISTORY` table is as follows: @@ -200,9 +260,12 @@ The structure of the `CHANGE_HISTORY` table is as follows: | INSTALLED_BY | VARCHAR | SNOWFLAKE_USER | | INSTALLED_ON | TIMESTAMP_LTZ | 2020-03-17 12:54:33.056 -0700 | -A new row will be added to this table every time a change script has been applied to the database. schemachange will use this table to identify which changes have been applied to the database and will not apply the same version more than once. +A new row will be added to this table every time a change script has been applied to the database. schemachange will use +this table to identify which changes have been applied to the database and will not apply the same version more than +once. -Here is the current schema DDL for the change history table (found in the [schemachange/cli.py](schemachange/cli.py) script), in case you choose to create it manually and not use the `--create-change-history-table` parameter: +Here is the current schema DDL for the change history table (found in the [schemachange/cli.py](schemachange/cli.py) +script), in case you choose to create it manually and not use the `--create-change-history-table` parameter: ```sql CREATE TABLE IF NOT EXISTS SCHEMACHANGE.CHANGE_HISTORY @@ -220,68 +283,106 @@ CREATE TABLE IF NOT EXISTS SCHEMACHANGE.CHANGE_HISTORY ``` ## Authentication -Schemachange supports snowflake's default authenticator, External Oauth, Browswer based SSO and Programmatic SSO options supported by the [Snowflake Python Connector](https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-to-snowflake). Set the environment variable `SNOWFLAKE_AUTHENTICATOR` to one of the following + +Schemachange supports snowflake's default authenticator, External Oauth, Browswer based SSO and Programmatic SSO options +supported by +the [Snowflake Python Connector](https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-to-snowflake). +Set the environment variable `SNOWFLAKE_AUTHENTICATOR` to one of the following Authentication Option | Expected Value --- | --- -Default [Password](https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-using-the-default-authenticator) Authenticator | `snowflake` -[Key Pair](https://docs.snowflake.com/en/user-guide/python-connector-example.html#using-key-pair-authentication) Authenticator| `snowflake` +Default [Password](https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-using-the-default-authenticator) +Authenticator | `snowflake` +[Key Pair](https://docs.snowflake.com/en/user-guide/python-connector-example.html#using-key-pair-authentication) +Authenticator| `snowflake` [External Oauth](https://docs.snowflake.com/en/user-guide/oauth-external.html) | `oauth` [Browser based SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) | `externalbrowser` -[Programmatic SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#native-sso-okta-only) (Okta Only) | Okta URL endpoint for your Okta account typically in the form `https://.okta.com` OR `https://.oktapreview.com` +[Programmatic SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#native-sso-okta-only) (Okta +Only) | Okta URL endpoint for your Okta account typically in the form `https://.okta.com` +OR `https://.oktapreview.com` -If an authenticator is unsupported, then schemachange will default to `snowflake`. If the authenticator is `snowflake`, and both password and key pair values are provided then schemachange will use the password over the key pair values. +If an authenticator is unsupported, then schemachange will default to `snowflake`. If the authenticator is `snowflake`, +and both password and key pair values are provided then schemachange will use the password over the key pair values. ### Password Authentication -The Snowflake user password for `SNOWFLAKE_USER` is required to be set in the environment variable `SNOWFLAKE_PASSWORD` prior to calling the script. schemachange will fail if the `SNOWFLAKE_PASSWORD` environment variable is not set. The environment variable `SNOWFLAKE_AUTHENTICATOR` will be set to `snowflake` if it not explicitly set. -_**DEPRECATION NOTICE**: The `SNOWSQL_PWD` environment variable is deprecated but currently still supported. Support for it will be removed in a later version of schemachange. Please use `SNOWFLAKE_PASSWORD` instead._ +The Snowflake user password for `SNOWFLAKE_USER` is required to be set in the environment variable `SNOWFLAKE_PASSWORD` +prior to calling the script. schemachange will fail if the `SNOWFLAKE_PASSWORD` environment variable is not set. The +environment variable `SNOWFLAKE_AUTHENTICATOR` will be set to `snowflake` if it not explicitly set. + +_**DEPRECATION NOTICE**: The `SNOWSQL_PWD` environment variable is deprecated but currently still supported. Support for +it will be removed in a later version of schemachange. Please use `SNOWFLAKE_PASSWORD` instead._ ### Private Key Authentication -The Snowflake user encrypted private key for `SNOWFLAKE_USER` is required to be in a file with the file path set in the environment variable `SNOWFLAKE_PRIVATE_KEY_PATH`. Additionally, the password for the encrypted private key file is required to be set in the environment variable `SNOWFLAKE_PRIVATE_KEY_PASSPHRASE`. If the variable is not set, schemachange will assume the private key is not encrypted. These two environment variables must be set prior to calling the script. Schemachange will fail if the `SNOWFLAKE_PRIVATE_KEY_PATH` is not set. + +The Snowflake user encrypted private key for `SNOWFLAKE_USER` is required to be in a file with the file path set in the +environment variable `SNOWFLAKE_PRIVATE_KEY_PATH`. Additionally, the password for the encrypted private key file is +required to be set in the environment variable `SNOWFLAKE_PRIVATE_KEY_PASSPHRASE`. If the variable is not set, +schemachange will assume the private key is not encrypted. These two environment variables must be set prior to calling +the script. Schemachange will fail if the `SNOWFLAKE_PRIVATE_KEY_PATH` is not set. ### Oauth Authentication -An Oauth Configuration can be made in the [YAML Config File](#yaml-config-file) or passing an equivalent json dictionary to the switch `--oauth-config`. Invoke this method by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `oauth` prior to calling schemachange. Since different Oauth providers may require different information the Oauth configuration uses four named variables that are fed into a POST request to obtain a token. Azure is shown in the example YAML but other providers should use a similar pattern and request payload contents. + +An Oauth Configuration can be made in the [YAML Config File](#yaml-config-file) or passing an equivalent json dictionary +to the switch `--oauth-config`. Invoke this method by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the +value `oauth` prior to calling schemachange. Since different Oauth providers may require different information the Oauth +configuration uses four named variables that are fed into a POST request to obtain a token. Azure is shown in the +example YAML but other providers should use a similar pattern and request payload contents. + * token-provider-url -The URL of the authenticator resource that will receive the POST request. + The URL of the authenticator resource that will receive the POST request. * token-response-name -The Expected name of the JSON element containing the Token in the return response from the authenticator resource. + The Expected name of the JSON element containing the Token in the return response from the authenticator resource. * token-request-payload -The Set of variables passed as a dictionary to the `data` element of the request. + The Set of variables passed as a dictionary to the `data` element of the request. * token-request-headers -The Set of variables passed as a dictionary to the `headers` element of the request. - -It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead of the command line option. + The Set of variables passed as a dictionary to the `headers` element of the request. +It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead +of the command line option. ### External Browser Authentication -External browser authentication can be used for local development by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. -The client will be prompted to authenticate in a browser that pops up. Refer to the [documentation](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) to cache the token to minimize the number of times the browser pops up to authenticate the user. + +External browser authentication can be used for local development by setting the environment +variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. +The client will be prompted to authenticate in a browser that pops up. Refer to +the [documentation](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) +to cache the token to minimize the number of times the browser pops up to authenticate the user. ### Okta Authentication -For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the Okta URL that you utilize for SSO. -Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. -_** NOTE**: Please disable Okta MFA for the user who uses Native SSO authentication with client drivers. Please consult your Okta administrator for more information._ +For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the +Okta URL that you utilize for SSO. +Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta +endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. + +_** NOTE**: Please disable Okta MFA for the user who uses Native SSO authentication with client drivers. Please consult +your Okta administrator for more information._ ## Configuration Parameters to schemachange can be supplied in two different ways: + 1. Through a YAML config file 2. Via command line arguments If supplied by both the command line and the YAML file, The command line overides the YAML values. Additionally, regardless of the approach taken, the following paramaters are required to run schemachange: + * snowflake-account * snowflake-user * snowflake-role * snowflake-warehouse -Plese see [Usage Notes for the account Parameter (for the connect Method)](https://docs.snowflake.com/en/user-guide/python-connector-api.html#label-account-format-info) for more details on how to structure the account name. +Plese +see [Usage Notes for the account Parameter (for the connect Method)](https://docs.snowflake.com/en/user-guide/python-connector-api.html#label-account-format-info) +for more details on how to structure the account name. ### YAML Config File -schemachange expects the YAML config file to be named `schemachange-config.yml` and looks for it by default in the current folder. The folder can be overridden by using the `--config-folder` command line argument (see [Command Line Arguments](#command-line-arguments) below for more details). +schemachange expects the YAML config file to be named `schemachange-config.yml` and looks for it by default in the +current folder. The folder can be overridden by using the `--config-folder` command line argument ( +see [Command Line Arguments](#command-line-arguments) below for more details). Here is the list of available configurations in the `schemachange-config.yml` file: @@ -359,26 +460,34 @@ oauthconfig: ``` #### Yaml Jinja support -The YAML config file supports the jinja templating language and has a custom function "env_var" to access environmental variables. Jinja variables are unavailable and not yet loaded since they are supplied by the YAML file. Customisation of the YAML file can only happen through values passed via environment variables. + +The YAML config file supports the jinja templating language and has a custom function "env_var" to access environmental +variables. Jinja variables are unavailable and not yet loaded since they are supplied by the YAML file. Customisation of +the YAML file can only happen through values passed via environment variables. ##### env_var + Provides access to environmental variables. The function can be used two different ways. Return the value of the environmental variable if it exists, otherwise return the default value. + ```jinja {{ env_var('', 'default') }} ``` Return the value of the environmental variable if it exists, otherwise raise an error. + ```jinja {{ env_var('') }} ``` ### Command Line Arguments -Schemachange supports a few subcommands. If the subcommand is not provided it defaults to deploy. This behaviour keeps compatibility with versions prior to 3.2. +Schemachange supports a few subcommands. If the subcommand is not provided it defaults to deploy. This behaviour keeps +compatibility with versions prior to 3.2. #### deploy + This is the main command that runs the deployment process. `usage: schemachange deploy [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-m MODULES_FOLDER] [-a SNOWFLAKE_ACCOUNT] [-u SNOWFLAKE_USER] [-r SNOWFLAKE_ROLE] [-w SNOWFLAKE_WAREHOUSE] [-d SNOWFLAKE_DATABASE] [-s SNOWFLAKE_SCHEMA] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG]` @@ -405,7 +514,9 @@ This is the main command that runs the deployment process. | --oauth-config | Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })' | #### render -This subcommand is used to render a single script to the console. It is intended to support the development and troubleshooting of script that use features from the jinja template engine. + +This subcommand is used to render a single script to the console. It is intended to support the development and +troubleshooting of script that use features from the jinja template engine. `usage: schemachange render [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-m MODULES_FOLDER] [--vars VARS] [-v] script` @@ -424,16 +535,27 @@ This subcommand is used to render a single script to the console. It is intended In order to run schemachange you must have the following: * You will need to have a recent version of python 3 installed -* You will need to have the latest [Snowflake Python driver installed](https://docs.snowflake.com/en/user-guide/python-connector-install.html) -* You will need to create the change history table used by schemachange in Snowflake (see [Change History Table](#change-history-table) above for more details) - * First, you will need to create a database to store your change history table (schemachange will not help you with this). For your convenience, [initialize.sql file](demo/provision/initialize.sql) has been provided to get you started. Feel free to align the script to your organizations RBAC implementation. The [setup_schemachange_schema.sql](demo/provision/setup_schemachange_schema.sql) file is provided to set up the target schema that will host the change history table for each of the demo projects in this repo. Use it as a means to test the required permissions and connectivity in your local setup. - * Second, you will need to create the change history schema and table. You can do this manually (see [Change History Table](#change-history-table) above for the DDL) or have schemachange create them by running it with the `--create-change-history-table` parameter (just make sure the Snowflake user you're running schemachange with has privileges to create a schema and table in that database) +* You will need to have the + latest [Snowflake Python driver installed](https://docs.snowflake.com/en/user-guide/python-connector-install.html) +* You will need to create the change history table used by schemachange in Snowflake ( + see [Change History Table](#change-history-table) above for more details) + * First, you will need to create a database to store your change history table (schemachange will not help you with + this). For your convenience, [initialize.sql file](demo/provision/initialize.sql) has been provided to get you + started. Feel free to align the script to your organizations RBAC implementation. + The [setup_schemachange_schema.sql](demo/provision/setup_schemachange_schema.sql) file is provided to set up the + target schema that will host the change history table for each of the demo projects in this repo. Use it as a + means to test the required permissions and connectivity in your local setup. + * Second, you will need to create the change history schema and table. You can do this manually ( + see [Change History Table](#change-history-table) above for the DDL) or have schemachange create them by running + it with the `--create-change-history-table` parameter (just make sure the Snowflake user you're running + schemachange with has privileges to create a schema and table in that database) * You will need to create (or choose) a user account that has privileges to apply the changes in your change script * Don't forget that this user also needs the SELECT and INSERT privileges on the change history table ### Running the Script -schemachange is a single python script located at [schemachange/cli.py](schemachange/cli.py). It can be executed as follows: +schemachange is a single python script located at [schemachange/cli.py](schemachange/cli.py). It can be executed as +follows: ``` python schemachange/cli.py [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-a SNOWFLAKE_ACCOUNT] [-u SNOWFLAKE_USER] [-r SNOWFLAKE_ROLE] [-w SNOWFLAKE_WAREHOUSE] [-d SNOWFLAKE_DATABASE] [-s SNOWFLAKE_SCHEMA] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG] [--oauth-config OUATH_CONFIG] @@ -445,19 +567,27 @@ Or if installed via `pip`, it can be executed as follows: schemachange [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-a SNOWFLAKE_ACCOUNT] [-u SNOWFLAKE_USER] [-r SNOWFLAKE_ROLE] [-w SNOWFLAKE_WAREHOUSE] [-d SNOWFLAKE_DATABASE] [-s SNOWFLAKE_SCHEMA] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG] [--oauth-config OUATH_CONFIG] ``` -The [demo](demo) folder in this project repository contains three schemachange demo projects for you to try out. These demos showcase the basics and a couple of advanced examples based on the standard Snowflake Citibike demo which can be found in [the Snowflake Hands-on Lab](https://docs.snowflake.net/manuals/other-resources.html#hands-on-lab). Check out each demo listed below +The [demo](demo) folder in this project repository contains three schemachange demo projects for you to try out. These +demos showcase the basics and a couple of advanced examples based on the standard Snowflake Citibike demo which can be +found in [the Snowflake Hands-on Lab](https://docs.snowflake.net/manuals/other-resources.html#hands-on-lab). Check out +each demo listed below - [Basics Demo](demo/basics_demo): Used to test the basic schemachange functionality. -- [Citibike Demo](demo/citibike_demo): Used to show a simple example of building a database and loading data using schemachange. -- [Citibike Jinja Demo](demo/citibike_demo_jinja): Extends the citibike demo to showcase the use of macros and jinja templating. +- [Citibike Demo](demo/citibike_demo): Used to show a simple example of building a database and loading data using + schemachange. +- [Citibike Jinja Demo](demo/citibike_demo_jinja): Extends the citibike demo to showcase the use of macros and jinja + templating. -The [Citibike data](https://www.citibikenyc.com/system-data) for this demo comes from the NYC Citi Bike bike share program. +The [Citibike data](https://www.citibikenyc.com/system-data) for this demo comes from the NYC Citi Bike bike share +program. To get started with schemachange and these demo scripts follow these steps: + 1. Make sure you've completed the [Prerequisites](#prerequisites) steps above 1. Get a copy of this schemachange repository (either via a clone or download) 1. Open a shell and change directory to your copy of the schemachange repository -1. Run schemachange (see [Running the Script](#running-the-script) above) with your Snowflake account details and respective demo project as the root folder (make sure you use the full path) +1. Run schemachange (see [Running the Script](#running-the-script) above) with your Snowflake account details and + respective demo project as the root folder (make sure you use the full path) ## Integrating With DevOps @@ -469,13 +599,15 @@ Here is a sample DevOps development lifecycle with schemachange: ### Using in a CI/CD Pipeline -If your build agent has a recent version of python 3 installed, the script can be ran like so: +If your build agent has a recent version of python 3 installed, the script can be run like so: + ```bash pip install schemachange --upgrade schemachange [-h] [-f ROOT_FOLDER] -a SNOWFLAKE_ACCOUNT -u SNOWFLAKE_USER -r SNOWFLAKE_ROLE -w SNOWFLAKE_WAREHOUSE [-d SNOWFLAKE_DATABASE] [-s SNOWFLAKE_SCHEMA] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG] [--oauth-config OUATH_CONFIG] ``` Or if you prefer docker, set the environment variables and run like so: + ```bash docker run -it --rm \ --name schemachange-script \ @@ -497,10 +629,13 @@ Either way, don't forget to set the `SNOWFLAKE_PASSWORD` environment variable if - James Weakley (@jamesweakley) - Jeremiah Hansen (@jeremiahhansen) -This is a community-developed tool, not an official Snowflake offering. It comes with no support or warranty. However, feel free to raise a GitHub issue if you find a bug or would like a new feature. +This is a community-developed tool, not an official Snowflake offering. It comes with no support or warranty. However, +feel free to raise a GitHub issue if you find a bug or would like a new feature. ## Third Party Packages -The current functionality in schemachange would not be possible without the following third party packages and all those that maintain and have contributed. + +The current functionality in schemachange would not be possible without the following third party packages and all those +that maintain and have contributed. | Name | License | Author | URL | |----------------------------|-------------------------|------------------------------------------------------------------------------------------------------------------|--------------------------------------| @@ -512,6 +647,10 @@ The current functionality in schemachange would not be possible without the foll ## Legal -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this tool except in compliance with the License. You may obtain a copy of the License at: [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this tool except in compliance with the +License. You may obtain a copy of the License +at: [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) -Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an " +AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. diff --git a/demo/README.MD b/demo/README.MD index 86a6a2ec..24fc2624 100644 --- a/demo/README.MD +++ b/demo/README.MD @@ -1,37 +1,59 @@ # Demo -The contents of this Demo serves two audiences - Consumers and Contributors. For the Consumer, the demo provides a basis to see how schemachange works with the main feature set. For the contributor, who forks the repo and submits PRs to the codebase, this will serve as a basis to test the PR against your own snowflake account to ensure your code change does not break any existing functionality. +The contents of this Demo serves two audiences - Consumers and Contributors. For the Consumer, the demo provides a basis +to see how schemachange works with the main feature set. For the contributor, who forks the repo and submits PRs to the +codebase, this will serve as a basis to test the PR against your own snowflake account to ensure your code change does +not break any existing functionality. -## Prequisite +## Prerequisite - You will need your own snowflake Account to test the Demo - Both as a contributor and consumer. -- You will need to review and run statements in the provision folder or setup your own database and schema. - - [initialize.sql](provision/initialize.sql): Contains the SQL variables to initialize your environment. - - [setup_schemachange_schema.sql](provision/setup_schemachange_schema.sql): Contains the SQL variables to track the individual demo scenarios in its own change history table. +- You will need to review and run statements in the provision folder or set up your own database and schema. + - [initialize.sql](provision/initialize.sql): Contains the SQL variables to initialize your environment. + - [setup_schemachange_schema.sql](provision/setup_schemachange_schema.sql): Contains the SQL variables to track the + individual demo scenarios in its own change history table. ### Contributors -As a contributor, you will have to set up schemachange demo database and schemachange schema (See Initialize and Setup scripts below). Along with that you will also set up the following Secrets in your forked repository so that the github actions can setup, test and teardown the temporary schema it creates to test the changes to your code in the master and dev branches respectively. + +As a contributor, you will have to set up schemachange demo database and schemachange schema (See Initialize and Setup +scripts below). Along with that you will also set up the following Secrets in your forked repository so that the GitHub +actions can set up, test and teardown the temporary schema it creates to test the changes to your code in the master and +dev branches respectively. + - SCHEMACHANGE_SNOWFLAKE_PASSWORD - SCHEMACHANGE_SNOWFLAKE_USER - SCHEMACHANGE_SNOWFLAKE_ACCOUNT ### Consumers -- If you are consumer who is installing schemachange and wants to test-run the demo, then you will have to set the following environment variables. + +- If you are consumer who is installing schemachange and wants to test-run the demo, then you will have to set the + following environment variables. - SNOWFLAKE_ACCOUNT: This will be the account identifier for your snowflake account. - SNOWFLAKE_USER: This will be the user that will connect to you snowflake account. - SNOWFLAKE_PASSWORD: This is the password for the user (SNOWFLAKE_USER) that will connect to the snowflake account. - - SCENARIO_NAME: This will be demo folder you intend to experiment with. For starters, `basics_demo`, `citibike_demo` or `citibike_demo_jinja` are included with the repo that will set the root folder value in the respective schemachange-config.yml file. - - SNOWFLAKE_WAREHOUSE: This will be the warehouse you setup for the demo. Default setup is SCHEMACHANGE_DEMO_WH + - SCENARIO_NAME: This will be demo folder you intend to experiment with. For + starters, `basics_demo`, `citibike_demo` or `citibike_demo_jinja` are included with the repo that will set the + root folder value in the respective schemachange-config.yml file. + - SNOWFLAKE_WAREHOUSE: This will be the warehouse you set up for the demo. Default setup is SCHEMACHANGE_DEMO_WH - SNOWFLAKE_DATABASE Keyed to SCHEMACHANGE_DEMO - SNOWFLAKE_ROLE Keyed to SCHEMACHANGE_DEMO-DEPLOY -The scripts in the `provision` folder can be used to setup up your demo database along with a schema in that database that will house the change tracking tables needed to setup and teardown the schemas used to test a working version of the demo DDL scripts. +The scripts in the `provision` folder can be used to set up up your demo database along with a schema in that database +that will house the change tracking tables needed to set up and teardown the schemas used to test a working version of +the demo DDL scripts. -- The [initialize](provision/initialize.sql) script setups the database, warehouse and account level access roles that will be used on the -- The [setup](provision/setup_schemachange_schema.sql) script creates the `SCHEMACHANGE` schema in the database that you created in the initialize step. +- The [initialize](provision/initialize.sql) script setups the database, warehouse and account level access roles that + will be used on the +- The [setup](provision/setup_schemachange_schema.sql) script creates the `SCHEMACHANGE` schema in the database that you + created in the initialize step. # Setup -The setup scripts are included to build the schema needed by the GitHub Actions Workflow to avoid conflict across jobs when tested in parallel. The Setup script will create a new schema to run the schemachange script for the corresponding scenario. + +The setup scripts are included to build the schema needed by the GitHub Actions Workflow to avoid conflict across jobs +when tested in parallel. The Setup script will create a new schema to run the schemachange script for the corresponding +scenario. # Teardown -The Teardown scripts are the bookend pairing of the Setup script for each scenario so that when the build process is done using github actions, you will have a log of the testing done to ensure that schemachange is working as expected. + +The Teardown scripts are the bookend pairing of the Setup script for each scenario so that when the build process is +done using GitHub actions, you will have a log of the testing done to ensure that schemachange is working as expected. diff --git a/requirements.txt b/requirements.txt index a151cfb5..8ba08504 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ Jinja2~=3.0 PyYAML~=6.0 snowflake-connector-python>=2.8,<4.0 +structlog~=24.1.0 diff --git a/schemachange/JinjaEnvVar.py b/schemachange/JinjaEnvVar.py new file mode 100644 index 00000000..c327f6ed --- /dev/null +++ b/schemachange/JinjaEnvVar.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import os + +import jinja2.ext + + +class JinjaEnvVar(jinja2.ext.Extension): + """ + Extends Jinja Templates with access to environmental variables + """ + + def __init__(self, environment: jinja2.Environment): + super().__init__(environment) + + # add globals + environment.globals["env_var"] = JinjaEnvVar.env_var + + @staticmethod + def env_var(env_var: str, default: str | None = None) -> str: + """ + Returns the value of the environmental variable or the default. + """ + result = default + if env_var in os.environ: + result = os.environ[env_var] + + if result is None: + raise ValueError( + f"Could not find environmental variable {env_var} and no default value was provided" + ) + + return result diff --git a/schemachange/JinjaTemplateProcessor.py b/schemachange/JinjaTemplateProcessor.py new file mode 100644 index 00000000..7c4daa6b --- /dev/null +++ b/schemachange/JinjaTemplateProcessor.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import jinja2 +import jinja2.ext +import structlog +from jinja2.loaders import BaseLoader + +from schemachange.JinjaEnvVar import JinjaEnvVar + +logger = structlog.getLogger(__name__) + + +class JinjaTemplateProcessor: + _env_args = { + "undefined": jinja2.StrictUndefined, + "autoescape": False, + "extensions": [JinjaEnvVar], + } + + def __init__(self, project_root: Path, modules_folder: Path = None): + loader: BaseLoader + if modules_folder: + loader = jinja2.ChoiceLoader( + [ + jinja2.FileSystemLoader(project_root), + jinja2.PrefixLoader( + {"modules": jinja2.FileSystemLoader(modules_folder)} + ), + ] + ) + else: + loader = jinja2.FileSystemLoader(project_root) + self.__environment = jinja2.Environment(loader=loader, **self._env_args) + self.__project_root = project_root + + def list(self): + return self.__environment.list_templates() + + def override_loader(self, loader: jinja2.BaseLoader): + # to make unit testing easier + self.__environment = jinja2.Environment(loader=loader, **self._env_args) + + def render(self, script: str, variables: dict[str, Any] | None) -> str: + if not variables: + variables = {} + # jinja needs posix path + posix_path = Path(script).as_posix() + template = self.__environment.get_template(posix_path) + content = template.render(**variables).strip() + content = content[:-1] if content.endswith(";") else content + return content + + def relpath(self, file_path: Path): + return file_path.relative_to(self.__project_root) diff --git a/schemachange/__init__.py b/schemachange/__init__.py index e69de29b..5e44d4e8 100644 --- a/schemachange/__init__.py +++ b/schemachange/__init__.py @@ -0,0 +1,15 @@ +import logging + +import structlog + +structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), + processors=[ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.dict_tracebacks, + structlog.dev.ConsoleRenderer(), + ], +) diff --git a/schemachange/cli.py b/schemachange/cli.py index 523a1968..d6fe54f0 100644 --- a/schemachange/cli.py +++ b/schemachange/cli.py @@ -1,1312 +1,75 @@ -import argparse import hashlib -import json -import os -import pathlib -import re -import sys -import textwrap -import time -import warnings -from typing import Dict, Any, Optional, Set -import jinja2 -import jinja2.ext -import requests -import snowflake.connector -import yaml -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives import serialization -from jinja2.loaders import BaseLoader +from pathlib import Path -# region Global Variables -# metadata -_schemachange_version = "3.7.0" -_config_file_name = "schemachange-config.yml" -_metadata_database_name = "METADATA" -_metadata_schema_name = "SCHEMACHANGE" -_metadata_table_name = "CHANGE_HISTORY" -_snowflake_application_name = "schemachange" - -# messages -_err_jinja_env_var = ( - "Could not find environmental variable %s and no default" + " value was provided" -) -_err_oauth_tk_nm = "Response Json contains keys: {keys} \n but not {key}" -_err_oauth_tk_err = "\n error description: {desc}" -_err_no_auth_mthd = ( - "Unable to find connection credentials for Okta, private key, " - + "password, Oauth or Browser authentication" -) -_err_unsupported_auth_mthd = ( - "'{unsupported_authenticator}' is not supported authenticator option. " - + "Choose from externalbrowser, oauth, https://.okta.com. Using default value = 'snowflake'" -) -_warn_password = ( - "The SNOWSQL_PWD environment variable is deprecated and will" - + " be removed in a later version of schemachange. Please use SNOWFLAKE_PASSWORD instead." -) -_warn_password_dup = ( - "Environment variables SNOWFLAKE_PASSWORD and SNOWSQL_PWD are " - + " both present, using SNOWFLAKE_PASSWORD" -) -_err_args_missing = ( - "Missing config values. The following config values are required: %s " -) -_err_env_missing = ( - "Missing environment variable(s). \nSNOWFLAKE_PASSWORD must be defined for " - + "password authentication. \nSNOWFLAKE_PRIVATE_KEY_PATH and (optional) " - + "SNOWFLAKE_PRIVATE_KEY_PASSPHRASE must be defined for private key authentication. " - + "\nSNOWFLAKE_AUTHENTICATOR must be defined is using Oauth, OKTA or external Browser Authentication." -) -_log_config_details = ( - "Using Snowflake account {snowflake_account}\nUsing default role " - + "{snowflake_role}\nUsing default warehouse {snowflake_warehouse}\nUsing default " - + "database {snowflake_database}\n" - + "schema {snowflake_schema}" -) -_log_ch_use = ( - "Using change history table {database_name}.{schema_name}.{table_name} " - + "(last altered {last_altered})" -) -_log_ch_create = ( - "Created change history table {database_name}.{schema_name}.{table_name}" -) -_err_ch_missing = ( - "Unable to find change history table {database_name}.{schema_name}.{table_name}" -) -_log_ch_max_version = ( - "Max applied change script version: {max_published_version_display}" -) -_log_skip_v = ( - "Skipping change script {script_name} because it's older than the most recently " - + "applied change ({max_published_version})" -) -_log_skip_r = ( - "Skipping change script {script_name} because there is no change since the last " - + "execution" -) -_log_apply = "Applying change script {script_name}" -_log_apply_set_complete = ( - "Successfully applied {scripts_applied} change scripts (skipping " - + "{scripts_skipped}) \nCompleted successfully" -) -_err_vars_config = "vars did not parse correctly, please check its configuration" -_err_vars_reserved = ( - "The variable schemachange has been reserved for use by schemachange, " - + "please use a different name" -) -_err_invalid_folder = "Invalid {folder_type} folder: {path}" -_err_dup_scripts = ( - "The script name {script_name} exists more than once (first_instance " - + "{first_path}, second instance {script_full_path})" -) -_err_dup_scripts_version = ( - "The script version {script_version} exists more than once " - + "(second instance {script_full_path})" -) -_err_invalid_cht = "Invalid change history table name: %s" -_log_auth_type = "Proceeding with %s authentication" -_log_pk_enc = "No private key passphrase provided. Assuming the key is not encrypted." -_log_okta_ep = "Okta Endpoint: %s" -_log_current_session_id = "Current session ID: {current_session_id}" - -# endregion Global Variables - - -class JinjaEnvVar(jinja2.ext.Extension): - """ - Extends Jinja Templates with access to environmental variables - """ - - def __init__(self, environment: jinja2.Environment): - super().__init__(environment) - - # add globals - environment.globals["env_var"] = JinjaEnvVar.env_var - - @staticmethod - def env_var(env_var: str, default: Optional[str] = None) -> str: - """ - Returns the value of the environmental variable or the default. - """ - result = default - if env_var in os.environ: - result = os.environ[env_var] - - if result is None: - raise ValueError(_err_jinja_env_var % env_var) - - return result - - -class JinjaTemplateProcessor: - _env_args = { - "undefined": jinja2.StrictUndefined, - "autoescape": False, - "extensions": [JinjaEnvVar], - } - - def __init__(self, project_root: str, modules_folder: str = None): - loader: BaseLoader - if modules_folder: - loader = jinja2.ChoiceLoader( - [ - jinja2.FileSystemLoader(project_root), - jinja2.PrefixLoader( - {"modules": jinja2.FileSystemLoader(modules_folder)} - ), - ] - ) - else: - loader = jinja2.FileSystemLoader(project_root) - self.__environment = jinja2.Environment(loader=loader, **self._env_args) - self.__project_root = project_root - - def list(self): - return self.__environment.list_templates() - - def override_loader(self, loader: jinja2.BaseLoader): - # to make unit testing easier - self.__environment = jinja2.Environment(loader=loader, **self._env_args) - - def render(self, script: str, vars: Dict[str, Any], verbose: bool) -> str: - if not vars: - vars = {} - # jinja needs posix path - posix_path = pathlib.Path(script).as_posix() - template = self.__environment.get_template(posix_path) - content = template.render(**vars).strip() - content = content[:-1] if content.endswith(";") else content - return content - - def relpath(self, file_path: str): - return os.path.relpath(file_path, self.__project_root) - - -class SecretManager: - """ - Provides the ability to redact secrets - """ - - __singleton: "SecretManager" - - @staticmethod - def get_global_manager() -> "SecretManager": - return SecretManager.__singleton - - @staticmethod - def set_global_manager(global_manager: "SecretManager"): - SecretManager.__singleton = global_manager - - @staticmethod - def global_redact(context: str) -> str: - """ - redacts any text that has been classified a secret - using the global SecretManager instance. - """ - return SecretManager.__singleton.redact(context) - - def __init__(self): - self.__secrets = set() - - def clear(self): - self.__secrets = set() - - def add(self, secret: str): - if secret: - self.__secrets.add(secret) - - def add_range(self, secrets: Set[str]): - if secrets: - self.__secrets = self.__secrets | secrets - - def redact(self, context: str) -> str: - """ - redacts any text that has been classified a secret - """ - redacted = context - if redacted: - for secret in self.__secrets: - redacted = redacted.replace(secret, "*" * len(secret)) - return redacted - - -class SnowflakeSchemachangeSession: - """ - Manages Snowflake Interactions and authentication - """ - - # region Query Templates - _q_ch_metadata = ( - "SELECT CREATED, LAST_ALTERED FROM {database_name}.INFORMATION_SCHEMA.TABLES" - + " WHERE TABLE_SCHEMA = REPLACE('{schema_name}','\"','') AND TABLE_NAME = replace('{table_name}','\"','')" - ) - _q_ch_schema_present = ( - "SELECT COUNT(1) FROM {database_name}.INFORMATION_SCHEMA.SCHEMATA" - + " WHERE SCHEMA_NAME = REPLACE('{schema_name}','\"','')" - ) - _q_ch_ddl_schema = "CREATE SCHEMA IF NOT EXISTS {schema_name}" - _q_ch_ddl_table = ( - "CREATE TABLE IF NOT EXISTS {database_name}.{schema_name}.{table_name} (VERSION VARCHAR, " - + "DESCRIPTION VARCHAR, SCRIPT VARCHAR, SCRIPT_TYPE VARCHAR, CHECKSUM VARCHAR," - + " EXECUTION_TIME NUMBER, STATUS VARCHAR, INSTALLED_BY VARCHAR, INSTALLED_ON TIMESTAMP_LTZ)" - ) - _q_ch_r_checksum = ( - "SELECT DISTINCT SCRIPT, FIRST_VALUE(CHECKSUM) OVER (PARTITION BY SCRIPT " - + "ORDER BY INSTALLED_ON DESC) FROM {database_name}.{schema_name}.{table_name} WHERE SCRIPT_TYPE = 'R' AND " - + "STATUS = 'Success'" - ) - _q_ch_fetch = ( - "SELECT VERSION FROM {database_name}.{schema_name}.{table_name} WHERE SCRIPT_TYPE = 'V' ORDER" - + " BY INSTALLED_ON DESC LIMIT 1" - ) - _q_sess_tag = "ALTER SESSION SET QUERY_TAG = '{query_tag}'" - _q_ch_log = ( - "INSERT INTO {database_name}.{schema_name}.{table_name} (VERSION, DESCRIPTION, SCRIPT, SCRIPT_TYPE, " - + "CHECKSUM, EXECUTION_TIME, STATUS, INSTALLED_BY, INSTALLED_ON) values ('{script_version}'," - + "'{script_description}','{script_name}','{script_type}','{checksum}',{execution_time}," - + "'{status}','{user}',CURRENT_TIMESTAMP);" - ) - _q_set_sess_role = "USE ROLE IDENTIFIER('{role}');" - _q_set_sess_database = "USE DATABASE IDENTIFIER('{database}');" - _q_set_sess_schema = "USE SCHEMA IDENTIFIER('{schema}');" - _q_set_sess_warehouse = "USE WAREHOUSE IDENTIFIER('{warehouse}');" - # endregion Query Templates - - def __init__(self, config): - # Retreive Connection info from config dictionary - self.conArgs = self.get_snowflake_params(config) - - self.oauth_config = config["oauth_config"] - self.autocommit = config["autocommit"] - self.verbose = config["verbose"] - if self.set_connection_args(): - print(self._q_set_sess_role.format(**self.conArgs)) - print(self._q_set_sess_warehouse.format(**self.conArgs)) - print(self._q_set_sess_database.format(**self.conArgs)) - print(self._q_set_sess_schema.format(**self.conArgs)) - self.con = snowflake.connector.connect(**self.conArgs) - print( - _log_current_session_id.format(current_session_id=self.con.session_id) - ) - # Setting session context - - if not self.autocommit: - self.con.autocommit(False) - else: - print(_err_env_missing) - - def __del__(self): - if hasattr(self, "con"): - self.con.close() - - def get_snowflake_params(self, config): - session_parameters = {"QUERY_TAG": "schemachange %s" % _schemachange_version} - if config["query_tag"]: - session_parameters["QUERY_TAG"] += ";%s" % config["query_tag"] - - return { - "user": config["snowflake_user"], - "account": config["snowflake_account"], - "role": get_snowflake_identifier_string( - config["snowflake_role"], "snowflake_role" - ), - "warehouse": get_snowflake_identifier_string( - config["snowflake_warehouse"], "snowflake_warehouse" - ), - "database": get_snowflake_identifier_string( - config["snowflake_database"], "snowflake_database" - ), - "schema": get_snowflake_identifier_string( - config["snowflake_schema"], "snowflake_schema" - ), - "application": _snowflake_application_name, - "session_parameters": session_parameters, - } - - def get_oauth_token(self): - req_info = { - "url": self.oauth_config["token-provider-url"], - "headers": self.oauth_config["token-request-headers"], - "data": self.oauth_config["token-request-payload"], - } - token_name = self.oauth_config["token-response-name"] - response = requests.post(**req_info) - resJsonDict = json.loads(response.text) - try: - return resJsonDict[token_name] - except KeyError: - errormessage = _err_oauth_tk_nm.format( - keys=", ".join(resJsonDict.keys()), key=token_name - ) - # if there is an error passed with the reponse include that - if "error_description" in resJsonDict.keys(): - errormessage += _err_oauth_tk_err.format( - desc=resJsonDict["error_description"] - ) - raise KeyError(errormessage) +import structlog +from structlog import BoundLogger - def set_connection_args(self): - # Password authentication is the default - snowflake_password = None - default_authenticator = "snowflake" - if os.getenv("SNOWFLAKE_PASSWORD") is not None and os.getenv( - "SNOWFLAKE_PASSWORD" - ): - snowflake_password = os.getenv("SNOWFLAKE_PASSWORD") +from schemachange.JinjaTemplateProcessor import JinjaTemplateProcessor +from schemachange.config.RenderConfig import RenderConfig +from schemachange.config.get_merged_config import get_merged_config +from schemachange.deploy import deploy +from schemachange.redact_config_secrets import redact_config_secrets +from schemachange.session.SnowflakeSession import get_session_from_config - # Check legacy/deprecated env variable - if os.getenv("SNOWSQL_PWD") is not None and os.getenv("SNOWSQL_PWD"): - if snowflake_password: - warnings.warn(_warn_password_dup, DeprecationWarning) - else: - warnings.warn(_warn_password, DeprecationWarning) - snowflake_password = os.getenv("SNOWSQL_PWD") - - snowflake_authenticator = os.getenv("SNOWFLAKE_AUTHENTICATOR") - - if snowflake_authenticator: - # Determine the type of Authenticator - # OAuth based authentication - if snowflake_authenticator.lower() == "oauth": - oauth_token = self.get_oauth_token() - - if self.verbose: - print(_log_auth_type % "Oauth Access Token") - - self.conArgs["token"] = oauth_token - self.conArgs["authenticator"] = "oauth" - # External Browswer based SSO - elif snowflake_authenticator.lower() == "externalbrowser": - self.conArgs["authenticator"] = "externalbrowser" - if self.verbose: - print(_log_auth_type % "External Browser") - - # IDP based Authentication, limited to Okta - elif snowflake_authenticator.lower()[:8] == "https://": - if self.verbose: - print(_log_auth_type % "Okta") - print(_log_okta_ep % snowflake_authenticator) - - self.conArgs["password"] = snowflake_password - self.conArgs["authenticator"] = snowflake_authenticator.lower() - - elif snowflake_authenticator.lower() == "snowflake": - self.conArgs["authenticator"] = default_authenticator - # if authenticator is not a supported method or the authenticator variable is defined but not specified - else: - # defaulting to snowflake as authenticator - if self.verbose: - print( - _err_unsupported_auth_mthd.format( - unsupported_authenticator=snowflake_authenticator - ) - ) - self.conArgs["authenticator"] = default_authenticator - else: - # default authenticator to snowflake - self.conArgs["authenticator"] = default_authenticator - - if self.conArgs["authenticator"].lower() == default_authenticator: - # Giving preference to password based authentication when both private key and password are specified. - if snowflake_password: - if self.verbose: - print(_log_auth_type % "password") - self.conArgs["password"] = snowflake_password - - elif os.getenv("SNOWFLAKE_PRIVATE_KEY_PATH", ""): - if self.verbose: - print(_log_auth_type % "private key") - - private_key_password = os.getenv("SNOWFLAKE_PRIVATE_KEY_PASSPHRASE", "") - if private_key_password: - private_key_password = private_key_password.encode() - else: - private_key_password = None - if self.verbose: - print(_log_pk_enc) - with open(os.environ["SNOWFLAKE_PRIVATE_KEY_PATH"], "rb") as key: - p_key = serialization.load_pem_private_key( - key.read(), - password=private_key_password, - backend=default_backend(), - ) - - pkb = p_key.private_bytes( - encoding=serialization.Encoding.DER, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption(), - ) - - self.conArgs["private_key"] = pkb - else: - raise NameError(_err_no_auth_mthd) - - return True - - def execute_snowflake_query(self, query): - if self.verbose: - print(SecretManager.global_redact("SQL query: %s" % query)) - try: - res = self.con.execute_string(query) - if not self.autocommit: - self.con.commit() - return res - except Exception as e: - if not self.autocommit: - self.con.rollback() - raise e - - def fetch_change_history_metadata(self, change_history_table): - # This should only ever return 0 or 1 rows - query = self._q_ch_metadata.format(**change_history_table) - results = self.execute_snowflake_query(query) - - # Collect all the results into a list - change_history_metadata = dict() - for cursor in results: - for row in cursor: - change_history_metadata["created"] = row[0] - change_history_metadata["last_altered"] = row[1] - - return change_history_metadata - - def create_change_history_table_if_missing(self, change_history_table): - # Check if schema exists - query = self._q_ch_schema_present.format(**change_history_table) - results = self.execute_snowflake_query(query) - schema_exists = False - for cursor in results: - for row in cursor: - schema_exists = row[0] > 0 - - # Create the schema if it doesn't exist - if not schema_exists: - query = self._q_ch_ddl_schema.format(**change_history_table) - self.execute_snowflake_query(query) - - # Finally, create the change history table if it doesn't exist - query = self._q_ch_ddl_table.format(**change_history_table) - self.execute_snowflake_query(query) - - def fetch_r_scripts_checksum(self, change_history_table) -> Dict[str, str]: - """ - Fetches the checksum of the last executed R script from the change history table. - return: a dictionary with the script name as key and the last successfully installed script checksum as value - """ - # Note: Query only fetches last successfully installed checksum for R scripts - query = self._q_ch_r_checksum.format(**change_history_table) - results = self.execute_snowflake_query(query) - - # Collect all the results into a dict - d_script_checksum = {} - for cursor in results: - for row in cursor: - d_script_checksum[row[0]] = row[1] - - return d_script_checksum - - def fetch_change_history(self, change_history_table): - query = self._q_ch_fetch.format(**change_history_table) - results = self.execute_snowflake_query(query) - - # Collect all the results into a list - change_history = list() - for cursor in results: - for row in cursor: - change_history.append(row[0]) - - return change_history - - def reset_session(self): - # These items are optional, so we can only reset the ones with values - reset_query = "" - if self.conArgs["role"]: - reset_query += self._q_set_sess_role.format(**self.conArgs) + " " - if self.conArgs["warehouse"]: - reset_query += self._q_set_sess_warehouse.format(**self.conArgs) + " " - if self.conArgs["database"]: - reset_query += self._q_set_sess_database.format(**self.conArgs) + " " - if self.conArgs["schema"]: - reset_query += self._q_set_sess_schema.format(**self.conArgs) + " " - self.execute_snowflake_query(reset_query) - - def reset_query_tag(self, extra_tag=None): - query_tag = self.conArgs["session_parameters"]["QUERY_TAG"] - if extra_tag: - query_tag += f";{extra_tag}" - - self.execute_snowflake_query(self._q_sess_tag.format(query_tag=query_tag)) - - def apply_change_script(self, script, script_content, change_history_table): - # Define a few other change related variables - checksum = hashlib.sha224(script_content.encode("utf-8")).hexdigest() - execution_time = 0 - status = "Success" - - # Execute the contents of the script - if len(script_content) > 0: - start = time.time() - self.reset_session() - self.reset_query_tag(script["script_name"]) - self.execute_snowflake_query(script_content) - self.reset_query_tag() - self.reset_session() - end = time.time() - execution_time = round(end - start) - - # Finally record this change in the change history table by gathering data - frmt_args = script.copy() - frmt_args.update(change_history_table) - frmt_args["checksum"] = checksum - frmt_args["execution_time"] = execution_time - frmt_args["status"] = status - frmt_args["user"] = self.conArgs["user"] - # Compose and execute the insert statement to the log file - query = self._q_ch_log.format(**frmt_args) - self.execute_snowflake_query(query) - - -def deploy_command(config): - # Make sure we have the required connection info, all of the below needs to be present. - req_args = { - "snowflake_account", - "snowflake_user", - "snowflake_role", - "snowflake_warehouse", - } - provided_args = {k: v for (k, v) in config.items() if v} - missing_args = req_args - provided_args.keys() - if len(missing_args) > 0: - raise ValueError( - _err_args_missing % ", ".join({s.replace("_", " ") for s in missing_args}) - ) - - # ensure an authentication method is specified / present. one of the below needs to be present. - req_env_var = { - "SNOWFLAKE_PASSWORD", - "SNOWSQL_PWD", - "SNOWFLAKE_PRIVATE_KEY_PATH", - "SNOWFLAKE_AUTHENTICATOR", - } - if len(req_env_var - dict(os.environ).keys()) == len(req_env_var): - raise ValueError(_err_env_missing) - - # Log some additional details - if config["dry_run"]: - print("Running in dry-run mode") - print(_log_config_details.format(**config)) - - # connect to snowflake and maintain connection - session = SnowflakeSchemachangeSession(config) - - scripts_skipped = 0 - scripts_applied = 0 - - # Deal with the change history table (create if specified) - change_history_table = get_change_history_table_details( - config["change_history_table"] - ) - change_history_metadata = session.fetch_change_history_metadata( - change_history_table - ) - if change_history_metadata: - print( - _log_ch_use.format( - last_altered=change_history_metadata["last_altered"], - **change_history_table, - ) - ) - elif config["create_change_history_table"]: - # Create the change history table (and containing objects) if it don't exist. - if not config["dry_run"]: - session.create_change_history_table_if_missing(change_history_table) - print(_log_ch_create.format(**change_history_table)) - else: - raise ValueError(_err_ch_missing.format(**change_history_table)) - - # Find the max published version - max_published_version = "" - - change_history = None - r_scripts_checksum = None - if (config["dry_run"] and change_history_metadata) or not config["dry_run"]: - change_history = session.fetch_change_history(change_history_table) - r_scripts_checksum = session.fetch_r_scripts_checksum(change_history_table) - - if change_history: - max_published_version = change_history[0] - max_published_version_display = max_published_version - if max_published_version_display == "": - max_published_version_display = "None" - print( - _log_ch_max_version.format( - max_published_version_display=max_published_version_display - ) - ) - - # Find all scripts in the root folder (recursively) and sort them correctly - all_scripts = get_all_scripts_recursively(config["root_folder"], config["verbose"]) - all_script_names = list(all_scripts.keys()) - # Sort scripts such that versioned scripts get applied first and then the repeatable ones. - all_script_names_sorted = ( - sorted_alphanumeric([script for script in all_script_names if script[0] == "V"]) - + sorted_alphanumeric( - [script for script in all_script_names if script[0] == "R"] - ) - + sorted_alphanumeric( - [script for script in all_script_names if script[0] == "A"] - ) - ) - - checksum_current = None - checksum_last = None - # Loop through each script in order and apply any required changes - for script_name in all_script_names_sorted: - script = all_scripts[script_name] - - # Apply a versioned-change script only if the version is newer than the most recent change in the database - # Apply any other scripts, i.e. repeatable scripts, irrespective of the most recent change in the database - if script_name[0] == "V" and get_alphanum_key( - script["script_version"] - ) <= get_alphanum_key(max_published_version): - if config["verbose"]: - print( - _log_skip_v.format( - max_published_version=max_published_version, **script - ) - ) - scripts_skipped += 1 - continue - - # Always process with jinja engine - jinja_processor = JinjaTemplateProcessor( - project_root=config["root_folder"], modules_folder=config["modules_folder"] - ) - content = jinja_processor.render( - jinja_processor.relpath(script["script_full_path"]), - config["vars"], - config["verbose"], - ) - - # Apply only R scripts where the checksum changed compared to the last execution of snowchange - if script_name[0] == "R": - # Compute the checksum for the script - checksum_current = hashlib.sha224(content.encode("utf-8")).hexdigest() - - # check if R file was already executed - if r_scripts_checksum and (script_name in r_scripts_checksum): - checksum_last = r_scripts_checksum[script_name] - else: - checksum_last = "" - - # check if there is a change of the checksum in the script - if checksum_current == checksum_last: - if config["verbose"]: - print(_log_skip_r.format(**script)) - scripts_skipped += 1 - continue - - print(_log_apply.format(**script)) - if not config["dry_run"]: - session.apply_change_script(script, content, change_history_table) - - scripts_applied += 1 - - print( - _log_apply_set_complete.format( - scripts_applied=scripts_applied, scripts_skipped=scripts_skipped - ) - ) +# region Global Variables +# metadata +SCHEMACHANGE_VERSION = "3.7.0" +SNOWFLAKE_APPLICATION_NAME = "schemachange" +module_logger = structlog.getLogger(__name__) -def render_command(config, script_path): +def render(config: RenderConfig, script_path: Path, logger: BoundLogger) -> None: """ Renders the provided script. Note: does not apply secrets filtering. """ - # Validate the script file path - script_path = os.path.abspath(script_path) - if not os.path.isfile(script_path): - raise ValueError( - _err_invalid_folder.format(folder_type="script_path", path=script_path) - ) # Always process with jinja engine jinja_processor = JinjaTemplateProcessor( - project_root=config["root_folder"], modules_folder=config["modules_folder"] + project_root=config.root_folder, modules_folder=config.modules_folder ) content = jinja_processor.render( - jinja_processor.relpath(script_path), config["vars"], config["verbose"] + jinja_processor.relpath(script_path), config.config_vars ) checksum = hashlib.sha224(content.encode("utf-8")).hexdigest() - print("Checksum %s" % checksum) - print(content) - + logger.info("Success", checksum=checksum, content=content) -def alphanum_convert(text: str): - result = None - if text.isdigit(): - result = int(text) - else: - result = text.lower() - return result - -# This function will return a list containing the parts of the key (split by number parts) -# Each number is converted to and integer and string parts are left as strings -# This will enable correct sorting in python when the lists are compared -# e.g. get_alphanum_key('1.2.2') results in ['', 1, '.', 2, '.', 2, ''] -def get_alphanum_key(key): - alphanum_key = [alphanum_convert(c) for c in re.split("([0-9]+)", key)] - return alphanum_key - - -def sorted_alphanumeric(data): - return sorted(data, key=get_alphanum_key) - - -def load_schemachange_config(config_file_path: str) -> Dict[str, Any]: - """ - Loads the schemachange config file and processes with jinja templating engine - """ - config = dict() - - # First read in the yaml config file, if present - if os.path.isfile(config_file_path): - with open(config_file_path) as config_file: - # Run the config file through the jinja engine to give access to environmental variables - # The config file does not have the same access to the jinja functionality that a script - # has. - config_template = jinja2.Template( - config_file.read(), - undefined=jinja2.StrictUndefined, - extensions=[JinjaEnvVar], - ) - - # The FullLoader parameter handles the conversion from YAML scalar values to Python the dictionary format - config = yaml.load(config_template.render(), Loader=yaml.FullLoader) - print("Using config file: %s" % config_file_path) - return config - - -def get_schemachange_config( - config_file_path, - root_folder, - modules_folder, - snowflake_account, - snowflake_user, - snowflake_role, - snowflake_warehouse, - snowflake_database, - snowflake_schema, - change_history_table, - vars, - create_change_history_table, - autocommit, - verbose, - dry_run, - query_tag, - oauth_config, - **kwargs, -): - # create cli override dictionary - # Could refactor to just pass Args as a dictionary? - # **kwargs inlcuded to avoid complaints about unexpect arguments from arg parser eg:subcommand - cli_inputs = { - "root_folder": root_folder, - "modules_folder": modules_folder, - "snowflake_account": snowflake_account, - "snowflake_user": snowflake_user, - "snowflake_role": snowflake_role, - "snowflake_warehouse": snowflake_warehouse, - "snowflake_database": snowflake_database, - "snowflake_schema": snowflake_schema, - "change_history_table": change_history_table, - "vars": vars, - "create_change_history_table": create_change_history_table, - "autocommit": autocommit, - "verbose": verbose, - "dry_run": dry_run, - "query_tag": query_tag, - "oauth_config": oauth_config, - } - cli_inputs = {k: v for (k, v) in cli_inputs.items() if v} - - # load YAML inputs and convert kebabs to snakes - config = { - k.replace("-", "_"): v - for (k, v) in load_schemachange_config(config_file_path).items() - } - # set values passed into the cli Overriding values in config file - config.update(cli_inputs) - - # create Default values dictionary - config_defaults = { - "root_folder": os.path.abspath("."), - "modules_folder": None, - "snowflake_account": None, - "snowflake_user": None, - "snowflake_role": None, - "snowflake_warehouse": None, - "snowflake_database": None, - "snowflake_schema": None, - "change_history_table": None, - "vars": {}, - "create_change_history_table": False, - "autocommit": False, - "verbose": False, - "dry_run": False, - "query_tag": None, - "oauth_config": None, - } - # insert defualt values for items not populated - config.update( - {k: v for (k, v) in config_defaults.items() if k not in config.keys()} +def main(): + module_logger.info( + "schemachange version: %(schemachange_version)s" + % {"schemachange_version": SCHEMACHANGE_VERSION} ) - # Validate folder paths - if "root_folder" in config: - config["root_folder"] = os.path.abspath(config["root_folder"]) - if not os.path.isdir(config["root_folder"]): - raise ValueError( - _err_invalid_folder.format(folder_type="root", path=config["root_folder"]) - ) - - if config["modules_folder"]: - config["modules_folder"] = os.path.abspath(config["modules_folder"]) - if not os.path.isdir(config["modules_folder"]): - raise ValueError( - _err_invalid_folder.format( - folder_type="modules", path=config["modules_folder"] - ) - ) - if config["vars"]: - # if vars is configured wrong in the config file it will come through as a string - if not isinstance(config["vars"], dict): - raise ValueError(_err_vars_config) - - # the variable schema change has been reserved - if "schemachange" in config["vars"]: - raise ValueError(_err_vars_reserved) - - return config - - -def get_snowflake_identifier_string(input_value: str, input_type: str) -> str: - pattern = re.compile( - r"^[\w]+$" - ) # Words with alphanumeric characters and underscores only. - result = "" - - if input_value is None: - result = None - elif pattern.match(input_value): - result = input_value - elif input_value.startswith('"') and input_value.endswith('"'): - result = input_value - elif input_value.startswith('"') and not input_value.endswith('"'): - raise ValueError( - f"Invalid {input_type}: {input_value}. Missing ending double quote" - ) - elif not input_value.startswith('"') and input_value.endswith('"'): - raise ValueError( - f"Invalid {input_type}: {input_value}. Missing beginning double quote" - ) - else: - result = f'"{input_value}"' - - return result - - -def get_all_scripts_recursively(root_directory, verbose): - all_files = dict() - all_versions = list() - # Walk the entire directory structure recursively - for directory_path, directory_names, file_names in os.walk(root_directory): - for file_name in file_names: - file_full_path = os.path.join(directory_path, file_name) - script_name_parts = re.search( - r"^([V])(.+?)__(.+?)\.(?:sql|sql.jinja)$", - file_name.strip(), - re.IGNORECASE, - ) - repeatable_script_name_parts = re.search( - r"^([R])__(.+?)\.(?:sql|sql.jinja)$", file_name.strip(), re.IGNORECASE - ) - always_script_name_parts = re.search( - r"^([A])__(.+?)\.(?:sql|sql.jinja)$", file_name.strip(), re.IGNORECASE - ) - - # Set script type depending on whether it matches the versioned file naming format - if script_name_parts is not None: - script_type = "V" - if verbose: - print("Found Versioned file " + file_full_path) - elif repeatable_script_name_parts is not None: - script_type = "R" - if verbose: - print("Found Repeatable file " + file_full_path) - elif always_script_name_parts is not None: - script_type = "A" - if verbose: - print("Found Always file " + file_full_path) - else: - if verbose: - print("Ignoring non-change file " + file_full_path) - continue - - # script name is the filename without any jinja extension - (file_part, extension_part) = os.path.splitext(file_name) - if extension_part.upper() == ".JINJA": - script_name = file_part - else: - script_name = file_name - - # Add this script to our dictionary (as nested dictionary) - script = dict() - script["script_name"] = script_name - script["script_full_path"] = file_full_path - script["script_type"] = script_type - script["script_version"] = ( - "" if script_type in ["R", "A"] else script_name_parts.group(2) - ) - if script_type == "R": - script["script_description"] = ( - repeatable_script_name_parts.group(2).replace("_", " ").capitalize() - ) - elif script_type == "A": - script["script_description"] = ( - always_script_name_parts.group(2).replace("_", " ").capitalize() - ) - else: - script["script_description"] = ( - script_name_parts.group(3).replace("_", " ").capitalize() - ) - - # Throw an error if the script_name already exists - if script_name in all_files: - raise ValueError( - _err_dup_scripts.format( - first_path=all_files[script_name]["script_full_path"], **script - ) - ) - - all_files[script_name] = script + config = get_merged_config() + redact_config_secrets(config_secrets=config.secrets) - # Throw an error if the same version exists more than once - if script_type == "V": - if script["script_version"] in all_versions: - raise ValueError(_err_dup_scripts_version.format(**script)) - all_versions.append(script["script_version"]) - - return all_files - - -def get_change_history_table_details(change_history_table): - # Start with the global defaults - details = dict() - details["database_name"] = _metadata_database_name - details["schema_name"] = _metadata_schema_name - details["table_name"] = _metadata_table_name - - # Then override the defaults if requested. The name could be in one, two or three part notation. - if change_history_table is not None: - table_name_parts = change_history_table.strip().split(".") - if len(table_name_parts) == 1: - details["table_name"] = table_name_parts[0] - elif len(table_name_parts) == 2: - details["table_name"] = table_name_parts[1] - details["schema_name"] = table_name_parts[0] - elif len(table_name_parts) == 3: - details["table_name"] = table_name_parts[2] - details["schema_name"] = table_name_parts[1] - details["database_name"] = table_name_parts[0] - else: - raise ValueError(_err_invalid_cht % change_history_table) - # if the object name does not include '"' raise to upper case on return - return {k: v if '"' in v else v.upper() for (k, v) in details.items()} - - -def extract_config_secrets(config: Dict[str, Any]) -> Set[str]: - """ - Extracts all secret values from the vars attributes in config - """ - - # defined as an inner/ nested function to provide encapsulation - def inner_extract_dictionary_secrets( - dictionary: Dict[str, Any], child_of_secrets: bool = False - ) -> Set[str]: - """ - Considers any key with the word secret in the name as a secret or - all values as secrets if a child of a key named secrets. - """ - extracted_secrets: Set[str] = set() - - if dictionary: - for key, value in dictionary.items(): - if isinstance(value, dict): - if key == "secrets": - extracted_secrets = ( - extracted_secrets - | inner_extract_dictionary_secrets(value, True) - ) - else: - extracted_secrets = ( - extracted_secrets - | inner_extract_dictionary_secrets(value, child_of_secrets) - ) - elif child_of_secrets or "SECRET" in key.upper(): - extracted_secrets.add(value.strip()) - return extracted_secrets - - extracted = set() - - if config: - if "vars" in config: - extracted = inner_extract_dictionary_secrets(config["vars"]) - return extracted - - -def main(argv=sys.argv): - parser = argparse.ArgumentParser( - prog="schemachange", - description="""Apply schema changes to a Snowflake account. - Full readme at https://github.com/Snowflake-Labs/schemachange""", - formatter_class=argparse.RawTextHelpFormatter, - ) - subcommands = parser.add_subparsers(dest="subcommand") - - parser_deploy = subcommands.add_parser("deploy") - parser_deploy.add_argument( - "--config-folder", - type=str, - default=".", - help="The folder to look in for the schemachange-config.yml file (the default is the current working directory)", - required=False, - ) - parser_deploy.add_argument( - "-f", - "--root-folder", - type=str, - help="The root folder for the database change scripts", - required=False, - ) - parser_deploy.add_argument( - "-m", - "--modules-folder", - type=str, - help="The modules folder for jinja macros and templates to be used across multiple scripts", - required=False, - ) - parser_deploy.add_argument( - "-a", - "--snowflake-account", - type=str, - help="The name of the snowflake account (e.g. xy12345.east-us-2.azure)", - required=False, - ) - parser_deploy.add_argument( - "-u", - "--snowflake-user", - type=str, - help="The name of the snowflake user", - required=False, - ) - parser_deploy.add_argument( - "-r", - "--snowflake-role", - type=str, - help="The name of the default role to use", - required=False, - ) - parser_deploy.add_argument( - "-w", - "--snowflake-warehouse", - type=str, - help="The name of the default warehouse to use. Can be overridden in the change scripts.", - required=False, - ) - parser_deploy.add_argument( - "-d", - "--snowflake-database", - type=str, - help="The name of the default database to use. Can be overridden in the change scripts.", - required=False, - ) - parser_deploy.add_argument( - "-s", - "--snowflake-schema", - type=str, - help="The name of the default schema to use. Can be overridden in the change scripts.", - required=False, - ) - parser_deploy.add_argument( - "-c", - "--change-history-table", - type=str, - help="""Used to override the default name of the change history table - (the default is METADATA.SCHEMACHANGE.CHANGE_HISTORY)""", - required=False, + structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(config.log_level), ) - parser_deploy.add_argument( - "--vars", - type=json.loads, - help="""Define values for the variables to be replaced in change scripts, given in JSON format - (e.g. {"variable1": "value1", "variable2": "value2"})""", - required=False, - ) - parser_deploy.add_argument( - "--create-change-history-table", - action="store_true", - help="Create the change history schema and table, if they do not exist (the default is False)", - required=False, - ) - parser_deploy.add_argument( - "-ac", - "--autocommit", - action="store_true", - help="Enable autocommit feature for DML commands (the default is False)", - required=False, - ) - parser_deploy.add_argument( - "-v", - "--verbose", - action="store_true", - help="Display verbose debugging details during execution (the default is False)", - required=False, - ) - parser_deploy.add_argument( - "--dry-run", - action="store_true", - help="Run schemachange in dry run mode (the default is False)", - required=False, - ) - parser_deploy.add_argument( - "--query-tag", - type=str, - help="The string to add to the Snowflake QUERY_TAG session value for each query executed", - required=False, - ) - parser_deploy.add_argument( - "--oauth-config", - type=json.loads, - help="""Define values for the variables to Make Oauth Token requests - (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })""", - required=False, - ) - # TODO test CLI passing of args - parser_render = subcommands.add_parser( - "render", - description="Renders a script to the console, used to check and verify jinja output from scripts.", - ) - parser_render.add_argument( - "--config-folder", - type=str, - default=".", - help="The folder to look in for the schemachange-config.yml file (the default is the current working directory)", - required=False, - ) - parser_render.add_argument( - "-f", - "--root-folder", - type=str, - help="The root folder for the database change scripts", - required=False, - ) - parser_render.add_argument( - "-m", - "--modules-folder", - type=str, - help="The modules folder for jinja macros and templates to be used across multiple scripts", - required=False, - ) - parser_render.add_argument( - "--vars", - type=json.loads, - help="""Define values for the variables to be replaced in change scripts, given in JSON format - (e.g. {"variable1": "value1", "variable2": "value2"})""", - required=False, - ) - parser_render.add_argument( - "-v", - "--verbose", - action="store_true", - help="Display verbose debugging details during execution (the default is False)", - required=False, - ) - parser_render.add_argument("script", type=str, help="The script to render") + logger = structlog.getLogger() + logger = logger.bind(schemachange_version=SCHEMACHANGE_VERSION) - # The original parameters did not support subcommands. Check if a subcommand has been supplied - # if not default to deploy to match original behaviour. - args = argv[1:] - if len(args) == 0 or not any( - subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER"] - ): - args = ["deploy"] + args - - args = parser.parse_args(args) - - print("schemachange version: %s" % _schemachange_version) - - # First get the config values - config_file_path = os.path.join(args.config_folder, _config_file_name) - - # Retreive argparser attributes as dictionary - schemachange_args = args.__dict__ - schemachange_args["config_file_path"] = config_file_path - - # nullify expected null values for render. - if args.subcommand == "render": - renderoveride = { - "snowflake_account": None, - "snowflake_user": None, - "snowflake_role": None, - "snowflake_warehouse": None, - "snowflake_database": None, - "change_history_table": None, - "snowflake_schema": None, - "create_change_history_table": None, - "autocommit": None, - "dry_run": None, - "query_tag": None, - "oauth_config": None, - } - schemachange_args.update(renderoveride) - config = get_schemachange_config(**schemachange_args) - - # setup a secret manager and assign to global scope - sm = SecretManager() - SecretManager.set_global_manager(sm) - # Extract all secrets for --vars - sm.add_range(extract_config_secrets(config)) - - # Then log some details - print("Using root folder %s" % config["root_folder"]) - if config["modules_folder"]: - print("Using Jinja modules folder %s" % config["modules_folder"]) - - # pretty print the variables in yaml style - if config["vars"] == {}: - print("Using variables: {}") - else: - print("Using variables:") - print( - textwrap.indent( - SecretManager.global_redact( - yaml.dump(config["vars"], sort_keys=False, default_flow_style=False) - ), - prefix=" ", - ) - ) + config.log_details() # Finally, execute the command - if args.subcommand == "render": - render_command(config, args.script) + if config.subcommand == "render": + render( + config=config, + script_path=config.script_path, + logger=logger, + ) else: - deploy_command(config) + config.check_for_deploy_args() + session = get_session_from_config( + config=config, + schemachange_version=SCHEMACHANGE_VERSION, + snowflake_application_name=SNOWFLAKE_APPLICATION_NAME, + logger=logger, + ) + deploy(config=config, session=session) if __name__ == "__main__": diff --git a/schemachange/config/BaseConfig.py b/schemachange/config/BaseConfig.py new file mode 100644 index 00000000..f07df6d1 --- /dev/null +++ b/schemachange/config/BaseConfig.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import dataclasses +import logging +from abc import ABC +from pathlib import Path +from typing import Literal, ClassVar, TypeVar + +import structlog + +from schemachange.config.utils import ( + validate_directory, + validate_config_vars, + get_config_secrets, +) + +logger = structlog.getLogger(__name__) +T = TypeVar("T", bound="BaseConfig") + + +@dataclasses.dataclass(frozen=True, kw_only=True) +class BaseConfig(ABC): + default_config_file_name: ClassVar[str] = "schemachange-config.yml" + + subcommand: Literal["deploy", "render"] + config_version: int | None = None + config_file_path: Path | None = None + root_folder: Path | None = Path(".") + modules_folder: Path | None = None + config_vars: dict = dataclasses.field(default_factory=dict) + secrets: set[str] = dataclasses.field(default_factory=set) + log_level: int = logging.INFO + + @classmethod + def factory( + cls, + subcommand: Literal["deploy", "render"], + config_file_path: Path, + root_folder: Path | str | None = Path("."), + modules_folder: Path | str | None = None, + config_vars: str | dict | None = None, + log_level: int = logging.INFO, + **kwargs, + ): + try: + secrets = get_config_secrets(config_vars) + except Exception as e: + raise Exception( + "config_vars did not parse correctly, please check its configuration" + ) from e + + return cls( + subcommand=subcommand, + config_file_path=config_file_path, + root_folder=validate_directory(path=root_folder), + modules_folder=validate_directory(path=modules_folder), + config_vars=validate_config_vars(config_vars=config_vars), + secrets=secrets, + log_level=log_level, + **kwargs, + ) + + def log_details(self): + logger.info("Using root folder", root_folder=str(self.root_folder)) + if self.modules_folder: + logger.info( + "Using Jinja modules folder", modules_folder=str(self.modules_folder) + ) + + logger.info("Using variables", vars=self.config_vars) diff --git a/schemachange/config/ChangeHistoryTable.py b/schemachange/config/ChangeHistoryTable.py new file mode 100644 index 00000000..ba7645a5 --- /dev/null +++ b/schemachange/config/ChangeHistoryTable.py @@ -0,0 +1,51 @@ +import dataclasses +from typing import ClassVar + +from schemachange.config.utils import get_snowflake_identifier_string + + +@dataclasses.dataclass(frozen=True) +class ChangeHistoryTable: + _default_database_name: ClassVar[str] = "METADATA" + _default_schema_name: ClassVar[str] = "SCHEMACHANGE" + _default_table_name: ClassVar[str] = "CHANGE_HISTORY" + + table_name: str = "CHANGE_HISTORY" + schema_name: str = "SCHEMACHANGE" + database_name: str = "METADATA" + + @property + def fully_qualified(self) -> str: + return f"{self.database_name}.{self.schema_name}.{self.table_name}" + + @classmethod + def from_str(cls, table_str: str): + database_name = cls._default_database_name + schema_name = cls._default_schema_name + table_name = cls._default_table_name + + if table_str is not None: + table_name_parts = table_str.strip().split(".") + if len(table_name_parts) == 1: + table_name = table_name_parts[0] + elif len(table_name_parts) == 2: + table_name = table_name_parts[1] + schema_name = table_name_parts[0] + elif len(table_name_parts) == 3: + table_name = table_name_parts[2] + schema_name = table_name_parts[1] + database_name = table_name_parts[0] + else: + raise ValueError(f"Invalid change history table name: {table_str}") + + return cls( + table_name=get_snowflake_identifier_string( + input_value=table_name, input_type="table_name" + ), + schema_name=get_snowflake_identifier_string( + input_value=schema_name, input_type="schema_name" + ), + database_name=get_snowflake_identifier_string( + input_value=database_name, input_type="database_name" + ), + ) diff --git a/schemachange/config/DeployConfig.py b/schemachange/config/DeployConfig.py new file mode 100644 index 00000000..c0bcc2ee --- /dev/null +++ b/schemachange/config/DeployConfig.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import dataclasses +from pathlib import Path +from typing import Literal + +from schemachange.config.BaseConfig import BaseConfig +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.config.utils import get_snowflake_identifier_string + + +@dataclasses.dataclass(frozen=True, kw_only=True) +class DeployConfig(BaseConfig): + subcommand: Literal["deploy"] = "deploy" + snowflake_account: str | None = None + snowflake_user: str | None = None + snowflake_role: str | None = None + snowflake_warehouse: str | None = None + snowflake_database: str | None = None + snowflake_schema: str | None = None + # TODO: Turn change_history_table into three arguments. There's no need to parse it from a string + change_history_table: ChangeHistoryTable | None = dataclasses.field( + default_factory=ChangeHistoryTable + ) + create_change_history_table: bool = False + autocommit: bool = False + dry_run: bool = False + query_tag: str | None = None + oauth_config: dict | None = None + + @classmethod + def factory( + cls, + config_file_path: Path, + snowflake_role: str | None = None, + snowflake_warehouse: str | None = None, + snowflake_database: str | None = None, + snowflake_schema: str | None = None, + change_history_table: str | None = None, + **kwargs, + ): + if "subcommand" in kwargs: + kwargs.pop("subcommand") + + change_history_table = ChangeHistoryTable.from_str( + table_str=change_history_table + ) + + return super().factory( + subcommand="deploy", + config_file_path=config_file_path, + snowflake_role=get_snowflake_identifier_string( + snowflake_role, "snowflake_role" + ), + snowflake_warehouse=get_snowflake_identifier_string( + snowflake_warehouse, "snowflake_warehouse" + ), + snowflake_database=get_snowflake_identifier_string( + snowflake_database, "snowflake_database" + ), + snowflake_schema=get_snowflake_identifier_string( + snowflake_schema, "snowflake_schema" + ), + change_history_table=change_history_table, + **kwargs, + ) + + def check_for_deploy_args(self) -> None: + """Make sure we have the required connection info""" + + req_args = { + "snowflake_account": self.snowflake_account, + "snowflake_user": self.snowflake_user, + "snowflake_role": self.snowflake_role, + "snowflake_warehouse": self.snowflake_warehouse, + } + missing_args = [key for key, value in req_args.items() if value is None] + + if len(missing_args) == 0: + return + + missing_args = ", ".join({arg.replace("_", " ") for arg in missing_args}) + raise ValueError( + f"Missing config values. The following config values are required: {missing_args}" + ) diff --git a/schemachange/config/RenderConfig.py b/schemachange/config/RenderConfig.py new file mode 100644 index 00000000..f17946af --- /dev/null +++ b/schemachange/config/RenderConfig.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import dataclasses +from pathlib import Path +from typing import Literal + +from schemachange.config.BaseConfig import BaseConfig +from schemachange.config.utils import validate_file_path + + +@dataclasses.dataclass(frozen=True, kw_only=True) +class RenderConfig(BaseConfig): + subcommand: Literal["render"] = "render" + script_path: Path + + @classmethod + def factory( + cls, + script_path: Path | str, + **kwargs, + ): + # Ignore Deploy arguments + field_names = [field.name for field in dataclasses.fields(RenderConfig)] + kwargs = {k: v for k, v in kwargs.items() if k in field_names} + + if "subcommand" in kwargs: + kwargs.pop("subcommand") + + return super().factory( + subcommand="render", + script_path=validate_file_path(file_path=script_path), + **kwargs, + ) diff --git a/schemachange/config/__init__.py b/schemachange/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/schemachange/config/get_merged_config.py b/schemachange/config/get_merged_config.py new file mode 100644 index 00000000..607c09e9 --- /dev/null +++ b/schemachange/config/get_merged_config.py @@ -0,0 +1,71 @@ +import logging +import sys +from pathlib import Path +from typing import Union, Optional + +from schemachange.config.DeployConfig import DeployConfig +from schemachange.config.RenderConfig import RenderConfig +from schemachange.config.parse_cli_args import parse_cli_args +from schemachange.config.utils import load_yaml_config, validate_directory + + +def get_yaml_config_kwargs(config_file_path: Optional[Path]) -> dict: + # TODO: I think the configuration key for oauthconfig should be oauth-config. + # This looks like a bug in the current state of the repo to me + + # load YAML inputs and convert kebabs to snakes + kwargs = { + k.replace("-", "_").replace("oauthconfig", "oauth_config"): v + for (k, v) in load_yaml_config(config_file_path).items() + } + + if "verbose" in kwargs: + kwargs["log_level"] = logging.DEBUG + kwargs.pop("verbose") + + if "vars" in kwargs: + kwargs["config_vars"] = kwargs.pop("vars") + + return kwargs + + +def get_merged_config() -> Union[DeployConfig, RenderConfig]: + cli_kwargs = parse_cli_args(sys.argv[1:]) + + if "verbose" in cli_kwargs and cli_kwargs["verbose"]: + cli_kwargs["log_level"] = logging.DEBUG + cli_kwargs.pop("verbose") + + cli_config_vars = cli_kwargs.pop("config_vars", None) + if cli_config_vars is None: + cli_config_vars = {} + + config_folder = validate_directory(path=cli_kwargs.pop("config_folder", ".")) + config_file_path = Path(config_folder) / "schemachange-config.yml" + + yaml_kwargs = get_yaml_config_kwargs( + config_file_path=config_file_path, + ) + yaml_config_vars = yaml_kwargs.pop("config_vars", None) + if yaml_config_vars is None: + yaml_config_vars = {} + + config_vars = { + **yaml_config_vars, + **cli_config_vars, + } + + # override the YAML config with the CLI configuration + kwargs = { + "config_file_path": config_file_path, + "config_vars": config_vars, + **{k: v for k, v in yaml_kwargs.items() if v is not None}, + **{k: v for k, v in cli_kwargs.items() if v is not None}, + } + + if cli_kwargs["subcommand"] == "deploy": + return DeployConfig.factory(**kwargs) + elif cli_kwargs["subcommand"] == "render": + return RenderConfig.factory(**kwargs) + else: + raise Exception(f"unhandled subcommand: {cli_kwargs['subcommand'] }") diff --git a/schemachange/config/parse_cli_args.py b/schemachange/config/parse_cli_args.py new file mode 100644 index 00000000..f287cd5f --- /dev/null +++ b/schemachange/config/parse_cli_args.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +import argparse +import json +from enum import Enum + +import structlog + +logger = structlog.getLogger(__name__) + + +class EnumAction(argparse.Action): + """ + Argparse action for handling Enums + + Thanks to Tim! + https://stackoverflow.com/a/60750535 + """ + + def __init__(self, **kwargs): + # Pop off the type value + enum_type = kwargs.pop("type", None) + + # Ensure an Enum subclass is provided + if enum_type is None: + raise ValueError("type must be assigned an Enum when using EnumAction") + # noinspection PyTypeChecker + if not issubclass(enum_type, Enum): + raise TypeError("type must be an Enum when using EnumAction") + + # Generate choices from the Enum + kwargs.setdefault("choices", tuple(e.name for e in enum_type)) + + super().__init__(**kwargs) + + self._enum = enum_type + + def __call__(self, parser, namespace, values, option_string=None): + # Convert value back into an Enum + value = self._enum[values] + setattr(namespace, self.dest, value) + + +def parse_cli_args(args) -> dict: + parser = argparse.ArgumentParser( + prog="schemachange", + description="Apply schema changes to a Snowflake account. Full readme at " + "https://github.com/Snowflake-Labs/schemachange", + formatter_class=argparse.RawTextHelpFormatter, + ) + + parent_parser = argparse.ArgumentParser(add_help=False) + parent_parser.add_argument( + "--config-folder", + type=str, + default=".", + help="The folder to look in for the schemachange-config.yml file " + "(the default is the current working directory)", + required=False, + ) + parent_parser.add_argument( + "-f", + "--root-folder", + type=str, + help="The root folder for the database change scripts", + required=False, + ) + parent_parser.add_argument( + "-m", + "--modules-folder", + type=str, + help="The modules folder for jinja macros and templates to be used across multiple scripts", + required=False, + ) + parent_parser.add_argument( + "--vars", + type=json.loads, + help='Define values for the variables to replaced in change scripts, given in JSON format (e.g. {"variable1": ' + '"value1", "variable2": "value2"})', + required=False, + ) + parent_parser.add_argument( + "-v", + "--verbose", + action="store_const", + const=True, + default=None, + help="Display verbose debugging details during execution (the default is False)", + required=False, + ) + + subcommands = parser.add_subparsers(dest="subcommand") + parser_deploy = subcommands.add_parser("deploy", parents=[parent_parser]) + + parser_deploy.add_argument( + "-a", + "--snowflake-account", + type=str, + help="The name of the snowflake account (e.g. xy12345.east-us-2.azure)", + required=False, + ) + parser_deploy.add_argument( + "-u", + "--snowflake-user", + type=str, + help="The name of the snowflake user", + required=False, + ) + parser_deploy.add_argument( + "-r", + "--snowflake-role", + type=str, + help="The name of the default role to use", + required=False, + ) + parser_deploy.add_argument( + "-w", + "--snowflake-warehouse", + type=str, + help="The name of the default warehouse to use. Can be overridden in the change scripts.", + required=False, + ) + parser_deploy.add_argument( + "-d", + "--snowflake-database", + type=str, + help="The name of the default database to use. Can be overridden in the change scripts.", + required=False, + ) + parser_deploy.add_argument( + "-s", + "--snowflake-schema", + type=str, + help="The name of the default schema to use. Can be overridden in the change scripts.", + required=False, + ) + parser_deploy.add_argument( + "-c", + "--change-history-table", + type=str, + help="Used to override the default name of the change history table (the default is " + "METADATA.SCHEMACHANGE.CHANGE_HISTORY)", + required=False, + ) + parser_deploy.add_argument( + "--create-change-history-table", + action="store_const", + const=True, + default=None, + help="Create the change history schema and table, if they do not exist (the default is False)", + required=False, + ) + parser_deploy.add_argument( + "-ac", + "--autocommit", + action="store_const", + const=True, + default=None, + help="Enable autocommit feature for DML commands (the default is False)", + required=False, + ) + parser_deploy.add_argument( + "--dry-run", + action="store_const", + const=True, + default=None, + help="Run schemachange in dry run mode (the default is False)", + required=False, + ) + parser_deploy.add_argument( + "--query-tag", + type=str, + help="The string to add to the Snowflake QUERY_TAG session value for each query executed", + required=False, + ) + parser_deploy.add_argument( + "--oauth-config", + type=json.loads, + help='Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": ' + '"https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })', + required=False, + ) + + parser_render = subcommands.add_parser( + "render", + description="Renders a script to the console, used to check and verify jinja output from scripts.", + parents=[parent_parser], + ) + parser_render.add_argument( + "script_path", type=str, help="Path to the script to render" + ) + + # The original parameters did not support subcommands. Check if a subcommand has been supplied + # if not default to deploy to match original behaviour. + if len(args) == 0 or not any( + subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER"] + ): + args = ["deploy"] + args + + parsed_args = parser.parse_args(args) + + parsed_kwargs = parsed_args.__dict__ + + if "log_level" in parsed_kwargs and isinstance(parsed_kwargs["log_level"], Enum): + parsed_kwargs["log_level"] = parsed_kwargs["log_level"].value + + if "vars" in parsed_kwargs: + parsed_kwargs["config_vars"] = parsed_kwargs.pop("vars") + + return parsed_kwargs diff --git a/schemachange/config/utils.py b/schemachange/config/utils.py new file mode 100644 index 00000000..cd984951 --- /dev/null +++ b/schemachange/config/utils.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +import jinja2 +import jinja2.ext +import structlog +import yaml + +from schemachange.JinjaEnvVar import JinjaEnvVar + +logger = structlog.getLogger(__name__) + +snowflake_identifier_pattern = re.compile(r"^[\w]+$") + + +def get_snowflake_identifier_string(input_value: str, input_type: str) -> str: + # Words with alphanumeric characters and underscores only. + result = "" + + if input_value is None: + result = None + elif snowflake_identifier_pattern.match(input_value): + result = input_value + elif input_value.startswith('"') and input_value.endswith('"'): + result = input_value + elif input_value.startswith('"') and not input_value.endswith('"'): + raise ValueError( + f"Invalid {input_type}: {input_value}. Missing ending double quote" + ) + elif not input_value.startswith('"') and input_value.endswith('"'): + raise ValueError( + f"Invalid {input_type}: {input_value}. Missing beginning double quote" + ) + else: + result = f'"{input_value}"' + + return result + + +def get_config_secrets(config_vars: dict[str, dict | str] | None) -> set[str]: + """Extracts all secret values from the vars attributes in config""" + + def inner_extract_dictionary_secrets( + dictionary: dict[str, dict | str] | None, + child_of_secrets: bool = False, + ) -> set[str]: + """Considers any key with the word secret in the name as a secret or + all values as secrets if a child of a key named secrets. + + defined as an inner/ nested function to provide encapsulation + """ + extracted_secrets: set[str] = set() + + if not dictionary: + return extracted_secrets + + for key, value in dictionary.items(): + if isinstance(value, dict): + if key == "secrets": + child_of_secrets = True + extracted_secrets = ( + extracted_secrets + | inner_extract_dictionary_secrets(value, child_of_secrets) + ) + elif child_of_secrets or "SECRET" in key.upper(): + extracted_secrets.add(value.strip()) + + return extracted_secrets + + return inner_extract_dictionary_secrets(config_vars) + + +def validate_file_path(file_path: Path | str) -> Path: + if isinstance(file_path, str): + file_path = Path(file_path) + if not file_path.is_file(): + raise ValueError(f"invalid file path: {str(file_path)}") + return file_path + + +def validate_directory(path: Path | str | None) -> Path | None: + if path is None: + return path + if isinstance(path, str): + path = Path(path) + if not path.is_dir(): + raise ValueError(f"Path is not valid directory: {str(path)}") + return path + + +def validate_config_vars(config_vars: str | dict | None) -> dict: + if config_vars is None: + return {} + + if not isinstance(config_vars, dict): + raise ValueError( + f"config_vars did not parse correctly, please check its configuration: {config_vars}" + ) + + if "schemachange" in config_vars.keys(): + raise ValueError( + "The variable 'schemachange' has been reserved for use by schemachange, please use a different name" + ) + + return config_vars + + +def load_yaml_config(config_file_path: Path | None) -> dict[str, Any]: + """ + Loads the schemachange config file and processes with jinja templating engine + """ + config = dict() + + # First read in the yaml config file, if present + if config_file_path is not None and config_file_path.is_file(): + with config_file_path.open() as config_file: + # Run the config file through the jinja engine to give access to environmental variables + # The config file does not have the same access to the jinja functionality that a script + # has. + config_template = jinja2.Template( + config_file.read(), + undefined=jinja2.StrictUndefined, + extensions=[JinjaEnvVar], + ) + + # The FullLoader parameter handles the conversion from YAML scalar values to Python the dictionary format + config = yaml.load(config_template.render(), Loader=yaml.FullLoader) + logger.info("Using config file", config_file_path=str(config_file_path)) + return config diff --git a/schemachange/deploy.py b/schemachange/deploy.py new file mode 100644 index 00000000..f1c53f39 --- /dev/null +++ b/schemachange/deploy.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import hashlib +import re + +import structlog + +from schemachange.JinjaTemplateProcessor import JinjaTemplateProcessor +from schemachange.config.DeployConfig import DeployConfig +from schemachange.session.Script import get_all_scripts_recursively +from schemachange.session.SnowflakeSession import SnowflakeSession + +logger = structlog.getLogger(__name__) + + +def alphanum_convert(text: str): + if text.isdigit(): + return int(text) + return text.lower() + + +# This function will return a list containing the parts of the key (split by number parts) +# Each number is converted to and integer and string parts are left as strings +# This will enable correct sorting in python when the lists are compared +# e.g. get_alphanum_key('1.2.2') results in ['', 1, '.', 2, '.', 2, ''] +def get_alphanum_key(key): + alphanum_key = [alphanum_convert(c) for c in re.split("([0-9]+)", key)] + return alphanum_key + + +def sorted_alphanumeric(data): + return sorted(data, key=get_alphanum_key) + + +def deploy(config: DeployConfig, session: SnowflakeSession): + logger.info( + "starting deploy", + dry_run=config.dry_run, + snowflake_account=session.account, + default_role=session.role, + default_warehouse=session.warehouse, + default_database=session.database, + default_schema=session.schema, + change_history_table=session.change_history_table.fully_qualified, + ) + + ( + versioned_scripts, + r_scripts_checksum, + max_published_version, + ) = session.get_script_metadata( + create_change_history_table=config.create_change_history_table, + dry_run=config.dry_run, + ) + + max_published_version = get_alphanum_key(max_published_version) + + # Find all scripts in the root folder (recursively) and sort them correctly + all_scripts = get_all_scripts_recursively( + root_directory=config.root_folder, + ) + all_script_names = list(all_scripts.keys()) + # Sort scripts such that versioned scripts get applied first and then the repeatable ones. + all_script_names_sorted = ( + sorted_alphanumeric([script for script in all_script_names if script[0] == "v"]) + + sorted_alphanumeric( + [script for script in all_script_names if script[0] == "r"] + ) + + sorted_alphanumeric( + [script for script in all_script_names if script[0] == "a"] + ) + ) + + scripts_skipped = 0 + scripts_applied = 0 + + # Loop through each script in order and apply any required changes + for script_name in all_script_names_sorted: + script = all_scripts[script_name] + script_log = logger.bind( + # The logging keys will be sorted alphabetically. + # Appending 'a' is a lazy way to get the script name to appear at the start of the log + a_script_name=script.name, + script_version=getattr(script, "version", "N/A"), + ) + # Always process with jinja engine + jinja_processor = JinjaTemplateProcessor( + project_root=config.root_folder, modules_folder=config.modules_folder + ) + content = jinja_processor.render( + jinja_processor.relpath(script.file_path), + config.config_vars, + ) + + checksum_current = hashlib.sha224(content.encode("utf-8")).hexdigest() + + # Apply a versioned-change script only if the version is newer than the most recent change in the database + # Apply any other scripts, i.e. repeatable scripts, irrespective of the most recent change in the database + if script.type == "V": + script_metadata = versioned_scripts.get(script.name) + + if ( + max_published_version != "" + and get_alphanum_key(script.version) <= max_published_version + ): + if script_metadata is None: + script_log.debug( + "Skipping versioned script because it's older than the most recently applied change", + max_published_version=max_published_version, + ) + scripts_skipped += 1 + continue + else: + script_log.debug( + "Script has already been applied", + max_published_version=str(max_published_version), + ) + if script_metadata["checksum"] != checksum_current: + script_log.info("Script checksum has drifted since application") + + scripts_skipped += 1 + continue + + # Apply only R scripts where the checksum changed compared to the last execution of snowchange + if script.type == "R": + # check if R file was already executed + if (r_scripts_checksum is not None) and script.name in r_scripts_checksum: + checksum_last = r_scripts_checksum[script.name][0] + else: + checksum_last = "" + + # check if there is a change of the checksum in the script + if checksum_current == checksum_last: + script_log.debug( + "Skipping change script because there is no change since the last execution" + ) + scripts_skipped += 1 + continue + + session.apply_change_script( + script=script, + script_content=content, + dry_run=config.dry_run, + logger=script_log, + ) + + scripts_applied += 1 + + logger.info( + "Completed successfully", + scripts_applied=scripts_applied, + scripts_skipped=scripts_skipped, + ) diff --git a/schemachange/redact_config_secrets.py b/schemachange/redact_config_secrets.py new file mode 100644 index 00000000..cc8e4e65 --- /dev/null +++ b/schemachange/redact_config_secrets.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import copy +import warnings +from typing import Callable + +import structlog +from structlog import PrintLogger + + +def get_redact_config_secrets_processor( + config_secrets: set[str], +) -> Callable[[PrintLogger, str, dict], dict]: + def redact_config_secrets_processor( + _: PrintLogger, __: str, event_dict: dict + ) -> dict: + def redact_dict(level: int, sub_event_dict: dict) -> dict: + if level > 6: + warnings.warn( + "Unable to redact deeply nested secrets in log: %(event)s" + % {"event": event_dict["event"]} + ) + return sub_event_dict + for sub_k, sub_v in sub_event_dict.items(): + if isinstance(sub_v, dict): + sub_event_dict[sub_k] = redact_dict( + level=level + 1, sub_event_dict=sub_v + ) + elif isinstance(sub_v, str): + for secret in config_secrets: + if secret in sub_v: + sub_event_dict[sub_k] = sub_event_dict[sub_k].replace( + secret, "*" * len(secret) + ) + elif isinstance(sub_v, int): + for secret in config_secrets: + if secret in str(sub_v): + sub_event_dict[sub_k] = str(sub_event_dict[sub_k]).replace( + secret, "*" * len(secret) + ) + else: + warnings.warn( + "Unable to redact %(type)s log arguments in log: %(event)s" + % {"type": type(sub_v).__name__, "event": event_dict["event"]} + ) + return sub_event_dict + return sub_event_dict + + return redact_dict(level=0, sub_event_dict=copy.deepcopy(event_dict)) + + return redact_config_secrets_processor + + +def redact_config_secrets(config_secrets: set[str]) -> None: + if not config_secrets: + return + + cfg = structlog.get_config() + redact_config_secrets_processor = get_redact_config_secrets_processor( + config_secrets=config_secrets + ) + + new_processors = cfg["processors"] + new_processors.insert(len(cfg["processors"]) - 1, redact_config_secrets_processor) + + structlog.configure(processors=new_processors) diff --git a/schemachange/session/Credential.py b/schemachange/session/Credential.py new file mode 100644 index 00000000..b39e180b --- /dev/null +++ b/schemachange/session/Credential.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import dataclasses +import os +from abc import ABC +from typing import Literal, Union + +import structlog + +from schemachange.session.utils import ( + get_snowflake_password, + get_private_key_bytes, + get_oauth_token, +) + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class Credential(ABC): + authenticator: str + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class OauthCredential(Credential): + authenticator: Literal["oauth"] = "oauth" + token: str + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class PasswordCredential(Credential): + authenticator: Literal["snowflake"] = "snowflake" + password: str + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class PrivateKeyCredential(Credential): + authenticator: Literal["snowflake"] = "snowflake" + private_key: bytes + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class ExternalBrowserCredential(Credential): + authenticator: Literal["externalbrowser"] = "externalbrowser" + password: str | None = None + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class OktaCredential(Credential): + authenticator: str + password: str + + +SomeCredential = Union[ + OauthCredential, + PasswordCredential, + ExternalBrowserCredential, + OktaCredential, + PrivateKeyCredential, +] + + +def credential_factory( + logger: structlog.BoundLogger, + oauth_config: dict | None = None, +) -> SomeCredential: + snowflake_authenticator = os.getenv("SNOWFLAKE_AUTHENTICATOR", default="snowflake") + + # OAuth based authentication + if snowflake_authenticator.lower() == "oauth": + logger.debug("Proceeding with Oauth Access Token authentication") + return OauthCredential(token=get_oauth_token(oauth_config)) + + # External Browser based SSO + if snowflake_authenticator.lower() == "externalbrowser": + logger.debug("Proceeding with External Browser authentication") + return ExternalBrowserCredential() + + snowflake_password = get_snowflake_password() + + # IDP based Authentication, limited to Okta + if snowflake_authenticator.lower()[:8] == "https://": + logger.debug( + "Proceeding with Okta authentication", okta_endpoint=snowflake_authenticator + ) + return OktaCredential( + authenticator=snowflake_authenticator, password=snowflake_password + ) + + if snowflake_authenticator.lower() != "snowflake": + logger.debug( + "Supplied authenticator is not supported authenticator option. Choose from snowflake, " + "externalbrowser, oauth, https://.okta.com. " + "Using default value = 'snowflake'", + snowflake_authenticator=snowflake_authenticator, + ) + + if snowflake_password: + logger.debug("Proceeding with password authentication") + + return PasswordCredential(password=snowflake_password) + + if os.getenv("SNOWFLAKE_PRIVATE_KEY_PATH", ""): + logger.debug("Proceeding with private key authentication") + + return PrivateKeyCredential(private_key=get_private_key_bytes()) + + raise NameError( + "Missing environment variable(s). \n" + "SNOWFLAKE_PASSWORD must be defined for password authentication. \n" + "SNOWFLAKE_PRIVATE_KEY_PATH and (optional) SNOWFLAKE_PRIVATE_KEY_PASSPHRASE " + "must be defined for private key authentication. \n" + "SNOWFLAKE_AUTHENTICATOR must be defined is using Oauth, OKTA or external Browser Authentication." + ) diff --git a/schemachange/session/Script.py b/schemachange/session/Script.py new file mode 100644 index 00000000..bfa29a87 --- /dev/null +++ b/schemachange/session/Script.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import dataclasses +import itertools +import re +from abc import ABC +from pathlib import Path +from typing import ( + Literal, + ClassVar, + TypeVar, + Pattern, +) + +import structlog + +logger = structlog.getLogger(__name__) +T = TypeVar("T", bound="Script") + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class Script(ABC): + pattern: ClassVar[Pattern[str]] + type: ClassVar[Literal["V", "R", "A"]] + name: str + file_path: Path + description: str + + @staticmethod + def get_script_name(file_path: Path) -> str: + """Script name is the filename without any jinja extension""" + if file_path.suffixes[-1].upper() == ".JINJA": + return file_path.stem + return file_path.name + + @classmethod + def from_path(cls, file_path: Path, **kwargs) -> T: + logger.debug("script found", class_name=cls.__name__, file_path=str(file_path)) + + # script name is the filename without any jinja extension + script_name = cls.get_script_name(file_path=file_path) + name_parts = cls.pattern.search(file_path.name.strip()) + description = name_parts.group("description").replace("_", " ").capitalize() + # noinspection PyArgumentList + return cls( + name=script_name, file_path=file_path, description=description, **kwargs + ) + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class VersionedScript(Script): + pattern: ClassVar[re.Pattern[str]] = re.compile( + r"^(V)(?P.+?)?__(?P.+?)\.", re.IGNORECASE + ) + type: ClassVar[Literal["V"]] = "V" + version: str + + @classmethod + def from_path(cls: T, file_path: Path, **kwargs) -> T: + name_parts = cls.pattern.search(file_path.name.strip()) + + return super().from_path( + file_path=file_path, version=name_parts.group("version") + ) + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class RepeatableScript(Script): + pattern: ClassVar[re.Pattern[str]] = re.compile( + r"^(R)__(?P.+?)\.", re.IGNORECASE + ) + type: ClassVar[Literal["R"]] = "R" + + +@dataclasses.dataclass(kw_only=True, frozen=True) +class AlwaysScript(Script): + pattern: ClassVar[re.Pattern[str]] = re.compile( + r"^(A)__(?P.+?)\.", re.IGNORECASE + ) + type: ClassVar[Literal["A"]] = "A" + + +def script_factory( + file_path: Path, +) -> T | None: + if VersionedScript.pattern.search(file_path.name.strip()) is not None: + return VersionedScript.from_path(file_path=file_path) + + elif RepeatableScript.pattern.search(file_path.name.strip()) is not None: + return RepeatableScript.from_path(file_path=file_path) + + elif AlwaysScript.pattern.search(file_path.name.strip()) is not None: + return AlwaysScript.from_path(file_path=file_path) + + logger.debug("ignoring non-change file", file_path=str(file_path)) + + +def get_all_scripts_recursively(root_directory: Path): + all_files: dict[str, T] = dict() + all_versions = list() + # Walk the entire directory structure recursively + file_paths = itertools.chain( + root_directory.rglob("*.sql"), root_directory.rglob("*.sql.jinja") + ) + + for file_path in file_paths: + script = script_factory(file_path=file_path) + if script is None: + continue + + # Throw an error if the script_name already exists + if script.name.lower() in all_files: + raise ValueError( + f"The script name {script.name} exists more than once (" + f"first_instance {str(all_files[script.name.lower()].file_path)}, " + f"second instance {str(script.file_path)})" + ) + + all_files[script.name.lower()] = script + + # Throw an error if the same version exists more than once + if script.type == "V": + if script.version in all_versions: + raise ValueError( + f"The script version {script.version} exists more than once " + f"(second instance {str(script.file_path)})" + ) + all_versions.append(script.version) + + return all_files diff --git a/schemachange/session/SnowflakeSession.py b/schemachange/session/SnowflakeSession.py new file mode 100644 index 00000000..f6c88be6 --- /dev/null +++ b/schemachange/session/SnowflakeSession.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import hashlib +import time +from collections import defaultdict +from dataclasses import asdict +from textwrap import dedent, indent + +import snowflake.connector +import structlog + +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.config.DeployConfig import DeployConfig +from schemachange.session.Credential import SomeCredential, credential_factory +from schemachange.session.Script import VersionedScript, RepeatableScript, AlwaysScript + + +class SnowflakeSession: + user: str + account: str + role: str + warehouse: str + database: str | None + schema: str | None + query_tag: str | None + autocommit: bool + change_history_table: ChangeHistoryTable + logger: structlog.BoundLogger + session_parameters: dict[str, str] + conn: snowflake.connector.SnowflakeConnection + + """ + Manages Snowflake Interactions and authentication + """ + + def __init__( + self, + snowflake_user: str, + snowflake_account: str, + snowflake_role: str, + snowflake_warehouse: str, + schemachange_version: str, + application: str, + credential: SomeCredential, + change_history_table: ChangeHistoryTable, + logger: structlog.BoundLogger, + autocommit: bool = False, + snowflake_database: str | None = None, + snowflake_schema: str | None = None, + query_tag: str | None = None, + ): + self.user = snowflake_user + self.account = snowflake_account + self.role = snowflake_role + self.warehouse = snowflake_warehouse + self.database = snowflake_database + self.schema = snowflake_schema + self.autocommit = autocommit + self.change_history_table = change_history_table + self.logger = logger + + self.session_parameters = {"QUERY_TAG": f"schemachange {schemachange_version}"} + if query_tag: + self.session_parameters["QUERY_TAG"] += f";{query_tag}" + + self.con = snowflake.connector.connect( + user=self.user, + account=self.account, + role=self.role, + warehouse=self.warehouse, + database=self.database, + schema=self.schema, + application=application, + session_parameters=self.session_parameters, + **asdict(credential), + ) + print(f"Current session ID: {self.con.session_id}") + + if not self.autocommit: + self.con.autocommit(False) + + def __del__(self): + if hasattr(self, "con"): + self.con.close() + + def execute_snowflake_query(self, query: str, logger: structlog.BoundLogger): + logger.debug( + "Executing query", + query=indent(query, prefix="\t"), + ) + try: + res = self.con.execute_string(query) + if not self.autocommit: + self.con.commit() + return res + except Exception as e: + if not self.autocommit: + self.con.rollback() + raise e + + def fetch_change_history_metadata(self) -> dict: + # This should only ever return 0 or 1 rows + query = f"""\ + SELECT + CREATED, + LAST_ALTERED + FROM {self.change_history_table.database_name}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = REPLACE('{self.change_history_table.schema_name}','\"','') + AND TABLE_NAME = REPLACE('{self.change_history_table.table_name}','\"','') + """ + results = self.execute_snowflake_query(query=dedent(query), logger=self.logger) + + # Collect all the results into a list + change_history_metadata = dict() + for cursor in results: + for row in cursor: + change_history_metadata["created"] = row[0] + change_history_metadata["last_altered"] = row[1] + + return change_history_metadata + + def change_history_schema_exists(self) -> bool: + query = f"""\ + SELECT + COUNT(1) + FROM {self.change_history_table.database_name}.INFORMATION_SCHEMA.SCHEMATA + WHERE SCHEMA_NAME = REPLACE('{self.change_history_table.schema_name}','\"','') + """ + results = self.execute_snowflake_query(dedent(query), logger=self.logger) + for cursor in results: + for row in cursor: + return row[0] > 0 + + def create_change_history_schema(self, dry_run: bool) -> None: + query = f"CREATE SCHEMA IF NOT EXISTS {self.change_history_table.schema_name}" + if dry_run: + self.logger.debug( + "Running in dry-run mode. Skipping execution.", + query=indent(dedent(query), prefix="\t"), + ) + else: + self.execute_snowflake_query(dedent(query), logger=self.logger) + + def create_change_history_table(self, dry_run: bool) -> None: + query = f"""\ + CREATE TABLE IF NOT EXISTS {self.change_history_table.fully_qualified} ( + VERSION VARCHAR, + DESCRIPTION VARCHAR, + SCRIPT VARCHAR, + SCRIPT_TYPE VARCHAR, + CHECKSUM VARCHAR, + EXECUTION_TIME NUMBER, + STATUS VARCHAR, + INSTALLED_BY VARCHAR, + INSTALLED_ON TIMESTAMP_LTZ + ) + """ + if dry_run: + self.logger.debug( + "Running in dry-run mode. Skipping execution.", + query=indent(dedent(query), prefix="\t"), + ) + else: + self.execute_snowflake_query(dedent(query), logger=self.logger) + self.logger.info( + f"Created change history table {self.change_history_table.fully_qualified}" + ) + + def change_history_table_exists( + self, create_change_history_table: bool, dry_run: bool + ) -> bool: + change_history_metadata = self.fetch_change_history_metadata() + if change_history_metadata: + self.logger.info( + f"Using existing change history table {self.change_history_table.fully_qualified}", + last_altered=change_history_metadata["last_altered"], + ) + return True + elif create_change_history_table: + schema_exists = self.change_history_schema_exists() + if not schema_exists: + self.create_change_history_schema(dry_run=dry_run) + self.create_change_history_table(dry_run=dry_run) + if dry_run: + return False + self.logger.info("Created change history table") + return True + else: + raise ValueError( + f"Unable to find change history table {self.change_history_table.fully_qualified}" + ) + + def get_script_metadata( + self, create_change_history_table: bool, dry_run: bool + ) -> tuple[ + dict[str, dict[str, str | int]] | None, + dict[str, list[str]] | None, + str | int | None, + ]: + change_history_table_exists = self.change_history_table_exists( + create_change_history_table=create_change_history_table, + dry_run=dry_run, + ) + if not change_history_table_exists: + return None, None, None + + change_history, max_published_version = self.fetch_versioned_scripts() + r_scripts_checksum = self.fetch_repeatable_scripts() + + self.logger.info( + "Max applied change script version %(max_published_version)s" + % { + "max_published_version": max_published_version + if max_published_version != "" + else "None" + } + ) + return change_history, r_scripts_checksum, max_published_version + + def fetch_repeatable_scripts(self) -> dict[str, list[str]]: + query = f"""\ + SELECT DISTINCT + SCRIPT AS SCRIPT_NAME, + FIRST_VALUE(CHECKSUM) OVER ( + PARTITION BY SCRIPT + ORDER BY INSTALLED_ON DESC + ) AS CHECKSUM + FROM {self.change_history_table.fully_qualified} + WHERE SCRIPT_TYPE = 'R' + AND STATUS = 'Success' + """ + results = self.execute_snowflake_query(dedent(query), logger=self.logger) + + # Collect all the results into a dict + script_checksums: dict[str, list[str]] = defaultdict(list) + for cursor in results: + for script_name, checksum in cursor: + script_checksums[script_name].append(checksum) + return script_checksums + + def fetch_versioned_scripts( + self, + ) -> tuple[dict[str, dict[str, str | int]], str | int | None]: + query = f"""\ + SELECT VERSION, SCRIPT, CHECKSUM + FROM {self.change_history_table.fully_qualified} + WHERE SCRIPT_TYPE = 'V' + ORDER BY INSTALLED_ON DESC -- TODO: Why not order by version? + """ + results = self.execute_snowflake_query(dedent(query), logger=self.logger) + + # Collect all the results into a list + versioned_scripts: dict[str, dict[str, str | int]] = defaultdict(dict) + versions: list[str | int] = [] + for cursor in results: + for version, script, checksum in cursor: + versions.append(version) + versioned_scripts[script] = { + "version": version, + "script": script, + "checksum": checksum, + } + + return versioned_scripts, versions[0] if versions else None + + def reset_session(self, logger: structlog.BoundLogger): + # These items are optional, so we can only reset the ones with values + reset_query = [] + if self.role: + reset_query.append(f"USE ROLE IDENTIFIER('{self.role}');") + if self.warehouse: + reset_query.append(f"USE WAREHOUSE IDENTIFIER('{self.warehouse}');") + if self.database: + reset_query.append(f"USE DATABASE IDENTIFIER('{self.database}');") + if self.schema: + reset_query.append(f"USE SCHEMA IDENTIFIER('{self.schema}');") + + self.execute_snowflake_query("\n".join(reset_query), logger=logger) + + def reset_query_tag(self, logger: structlog.BoundLogger, extra_tag=None): + query_tag = self.session_parameters["QUERY_TAG"] + if extra_tag: + query_tag += f";{extra_tag}" + + self.execute_snowflake_query( + f"ALTER SESSION SET QUERY_TAG = '{query_tag}'", logger=logger + ) + + def apply_change_script( + self, + script: VersionedScript | RepeatableScript | AlwaysScript, + script_content: str, + dry_run: bool, + logger: structlog.BoundLogger, + ) -> None: + if dry_run: + logger.debug("Running in dry-run mode. Skipping execution") + return + logger.info("Applying change script") + # Define a few other change related variables + checksum = hashlib.sha224(script_content.encode("utf-8")).hexdigest() + execution_time = 0 + status = "Success" + + # Execute the contents of the script + if len(script_content) > 0: + start = time.time() + self.reset_session(logger=logger) + self.reset_query_tag(extra_tag=script.name, logger=logger) + try: + self.execute_snowflake_query(query=script_content, logger=logger) + except Exception as e: + raise Exception(f"Failed to execute {script.name}") from e + self.reset_query_tag(logger=logger) + self.reset_session(logger=logger) + end = time.time() + execution_time = round(end - start) + + # Compose and execute the insert statement to the log file + query = f"""\ + INSERT INTO {self.change_history_table.fully_qualified} ( + VERSION, + DESCRIPTION, + SCRIPT, + SCRIPT_TYPE, + CHECKSUM, + EXECUTION_TIME, + STATUS, + INSTALLED_BY, + INSTALLED_ON + ) VALUES ( + '{getattr(script, "version", "")}', + '{script.description}', + '{script.name}', + '{script.type}', + '{checksum}', + {execution_time}, + '{status}', + '{self.user}', + CURRENT_TIMESTAMP + ); + """ + self.execute_snowflake_query(dedent(query), logger=logger) + + +def get_session_from_config( + config: DeployConfig, + logger: structlog.BoundLogger, + schemachange_version: str, + snowflake_application_name: str, +) -> SnowflakeSession: + credential = credential_factory(logger=logger, oauth_config=config.oauth_config) + return SnowflakeSession( + snowflake_user=config.snowflake_user, + snowflake_account=config.snowflake_account, + snowflake_role=config.snowflake_role, + snowflake_warehouse=config.snowflake_warehouse, + schemachange_version=schemachange_version, + application=snowflake_application_name, + credential=credential, + change_history_table=config.change_history_table, + logger=logger, + autocommit=config.autocommit, + snowflake_database=config.snowflake_database, + snowflake_schema=config.snowflake_schema, + query_tag=config.query_tag, + ) diff --git a/schemachange/session/__init__.py b/schemachange/session/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/schemachange/session/utils.py b/schemachange/session/utils.py new file mode 100644 index 00000000..9cc58f38 --- /dev/null +++ b/schemachange/session/utils.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import json +import os +import warnings + +import requests +import structlog +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import serialization + +logger = structlog.getLogger(__name__) + + +def get_snowflake_password() -> str | None: + snowflake_password = None + if os.getenv("SNOWFLAKE_PASSWORD") is not None and os.getenv("SNOWFLAKE_PASSWORD"): + snowflake_password = os.getenv("SNOWFLAKE_PASSWORD") + + # Check legacy/deprecated env variable + if os.getenv("SNOWSQL_PWD") is not None and os.getenv("SNOWSQL_PWD"): + if snowflake_password: + warnings.warn( + "Environment variables SNOWFLAKE_PASSWORD and SNOWSQL_PWD " + "are both present, using SNOWFLAKE_PASSWORD", + DeprecationWarning, + ) + else: + warnings.warn( + "The SNOWSQL_PWD environment variable is deprecated and " + "will be removed in a later version of schemachange. " + "Please use SNOWFLAKE_PASSWORD instead.", + DeprecationWarning, + ) + snowflake_password = os.getenv("SNOWSQL_PWD") + return snowflake_password + + +def get_private_key_password() -> bytes | None: + private_key_password = os.getenv("SNOWFLAKE_PRIVATE_KEY_PASSPHRASE", "") + + if private_key_password: + return private_key_password.encode() + + logger.debug( + "No private key passphrase provided. Assuming the key is not encrypted." + ) + + return None + + +def get_private_key_bytes() -> bytes: + private_key_password = get_private_key_password() + with open(os.environ["SNOWFLAKE_PRIVATE_KEY_PATH"], "rb") as key: + p_key = serialization.load_pem_private_key( + key.read(), + password=private_key_password, + backend=default_backend(), + ) + + return p_key.private_bytes( + encoding=serialization.Encoding.DER, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + + +def get_oauth_token(oauth_config: dict): + req_info = { + "url": oauth_config["token-provider-url"], + "headers": oauth_config["token-request-headers"], + "data": oauth_config["token-request-payload"], + } + token_name = oauth_config["token-response-name"] + response = requests.post(**req_info) + response_dict = json.loads(response.text) + try: + return response_dict[token_name] + except KeyError: + keys = ", ".join(response_dict.keys()) + errormessage = f"Response Json contains keys: {keys} \n but not {token_name}" + # if there is an error passed with the response include that + if "error_description" in response_dict.keys(): + errormessage = f"{errormessage}\n error description: {response_dict['error_description']}" + raise KeyError(errormessage) diff --git a/setup.cfg b/setup.cfg index 71e1fd4f..aee64841 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = schemachange -version = 3.7.0 +version = 4.0.0 description = A Database Change Management tool for Snowflake long_description = file: README.md long_description_content_type = text/markdown @@ -15,11 +15,17 @@ classifiers = Programming Language :: Python :: 3 :: Only [options] -packages = schemachange +packages = + schemachange + schemachange.config + schemachange.session install_requires = + cryptography jinja2~=3.0 pyyaml~=6.0 + requests snowflake-connector-python>=2.8,<4.0 + structlog~=24.1.0 python_requires = >=3.8 include_package_data = True diff --git a/tests/config/schemachange-config.yml b/tests/config/schemachange-config.yml new file mode 100644 index 00000000..73a5e56a --- /dev/null +++ b/tests/config/schemachange-config.yml @@ -0,0 +1,30 @@ +config-version: 1 +root-folder: 'root-folder-from-yaml' +modules-folder: 'modules-folder-from-yaml' +snowflake-account: 'snowflake-account-from-yaml' +snowflake-user: 'snowflake-user-from-yaml' +snowflake-role: 'snowflake-role-from-yaml' +snowflake-warehouse: 'snowflake-warehouse-from-yaml' +snowflake-database: 'snowflake-database-from-yaml' +snowflake-schema: 'snowflake-schema-from-yaml' +change-history-table: 'change-history-table-from-yaml' +vars: + var1: 'from_yaml' + var2: 'also_from_yaml' +create-change-history-table: false +autocommit: false +verbose: false +dry-run: false +query-tag: 'query-tag-from-yaml' +oauthconfig: + token-provider-url: 'token-provider-url-from-yaml' + token-response-name: 'token-response-name-from-yaml' + token-request-headers: + Content-Type: 'Content-Type-from-yaml' + User-Agent: 'User-Agent-from-yaml' + token-request-payload: + client_id: 'id-from-yaml' + username: 'username-from-yaml' + password: 'password-from-yaml' + grant_type: 'type-from-yaml' + scope: 'scope-from-yaml' diff --git a/tests/config/test_Config.py b/tests/config/test_Config.py new file mode 100644 index 00000000..eca2e269 --- /dev/null +++ b/tests/config/test_Config.py @@ -0,0 +1,280 @@ +from __future__ import annotations + +from pathlib import Path +from unittest import mock + +import pytest + +from schemachange.config.BaseConfig import BaseConfig +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.config.DeployConfig import DeployConfig +from schemachange.config.RenderConfig import RenderConfig +from schemachange.config.utils import get_config_secrets + + +@pytest.fixture +@mock.patch("pathlib.Path.is_dir", return_value=True) +def yaml_config(_) -> DeployConfig: + return DeployConfig.factory( + config_file_path=Path(__file__).parent.parent.parent + / "demo" + / "basics_demo" + / "schemachange-config.yml", + root_folder=Path(__file__).parent.parent.parent / "demo" / "basics_demo", + modules_folder=Path(__file__).parent.parent.parent / "demo" / "basics_demo", + config_vars={"var1": "yaml_vars"}, + snowflake_account="yaml_snowflake_account", + snowflake_user="yaml_snowflake_user", + snowflake_role="yaml_snowflake_role", + snowflake_warehouse="yaml_snowflake_warehouse", + snowflake_database="yaml_snowflake_database", + snowflake_schema="yaml_snowflake_schema", + change_history_table="yaml_change_history_table", + create_change_history_table=True, + autocommit=True, + dry_run=True, + query_tag="yaml_query_tag", + oauth_config={"oauth": "yaml_oauth"}, + ) + + +class TestGetConfigSecrets: + def test_given_empty_config_should_not_error(self): + get_config_secrets(config_vars={}) + + def test_given_none_should_not_error(self): + get_config_secrets(None) + + @pytest.mark.parametrize( + "config_vars, secret", + [ + ({"secret": "secret_val1"}, "secret_val1"), + ({"SECret": "secret_val2"}, "secret_val2"), + ({"secret_key": "secret_val3"}, "secret_val3"), + ({"s3_bucket_secret": "secret_val4"}, "secret_val4"), + ({"s3SecretKey": "secret_val5"}, "secret_val5"), + ({"nested": {"s3_bucket_secret": "secret_val6"}}, "secret_val6"), + ], + ) + def test_given__vars_with_keys_should_extract_secret(self, config_vars, secret): + results = get_config_secrets(config_vars) + assert secret in results + + def test_given_vars_with_secrets_key_then_all_children_should_be_treated_as_secrets( + self, + ): + config_vars = { + "secrets": { + "database_name": "database_name_val", + "schema_name": "schema_name_val", + "nested_secrets": {"SEC_ONE": "SEC_ONE_VAL"}, + } + } + results = get_config_secrets(config_vars=config_vars) + + assert len(results) == 3 + assert "database_name_val" in results + assert "schema_name_val" in results + assert "SEC_ONE_VAL" in results + + def test_given_vars_with_nested_secrets_key_then_all_children_should_be_treated_as_secrets( + self, + ): + config_vars = { + "nested": { + "secrets": { + "database_name": "database_name_val", + "schema_name": "schema_name_val", + "nested": {"SEC_ONE": "SEC_ONE_VAL"}, + } + } + } + + results = get_config_secrets(config_vars) + + assert len(results) == 3 + assert "database_name_val" in results + assert "schema_name_val" in results + assert "SEC_ONE_VAL" in results + + def test_given_vars_with_same_secret_twice_then_only_extracted_once(self): + config_vars = { + "secrets": { + "database_name": "SECRET_VALUE", + "schema_name": "SECRET_VALUE", + "nested_secrets": {"SEC_ONE": "SECRET_VALUE"}, + } + } + + results = get_config_secrets(config_vars) + + assert len(results) == 1 + assert "SECRET_VALUE" in results + + +class TestTable: + @pytest.mark.parametrize( + "table_str, expected", + [ + ( + "DATABASE_NAME.SCHEMA_NAME.TABLE_NAME", + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMA_NAME", + database_name="DATABASE_NAME", + ), + ), + ( + "SCHEMA_NAME.TABLE_NAME", + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMA_NAME", + database_name="METADATA", + ), + ), + ( + "TABLE_NAME", + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMACHANGE", + database_name="METADATA", + ), + ), + ], + ) + def test_from_str_happy_path(self, table_str: str, expected: ChangeHistoryTable): + result = ChangeHistoryTable.from_str(table_str) + assert result == expected + + def test_from_str_exception(self): + with pytest.raises(ValueError) as e: + ChangeHistoryTable.from_str("FOUR.THREE.TWO.ONE") + + assert "Invalid change history table name:" in str(e.value) + + @pytest.mark.parametrize( + "table, expected", + [ + ( + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMA_NAME", + database_name="DATABASE_NAME", + ), + "DATABASE_NAME.SCHEMA_NAME.TABLE_NAME", + ), + ( + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMA_NAME", + database_name="METADATA", + ), + "METADATA.SCHEMA_NAME.TABLE_NAME", + ), + ( + ChangeHistoryTable( + table_name="TABLE_NAME", + schema_name="SCHEMACHANGE", + database_name="METADATA", + ), + "METADATA.SCHEMACHANGE.TABLE_NAME", + ), + ], + ) + def test_fully_qualified(self, table: ChangeHistoryTable, expected: str): + result = table.fully_qualified + assert result == expected + + +class TestConfig: + @mock.patch("pathlib.Path.is_dir", side_effect=[False]) + def test_invalid_root_folder(self, _): + with pytest.raises(Exception) as e_info: + DeployConfig.factory( + config_file_path=Path("some_config_file_name"), + root_folder="some_root_folder_name", + modules_folder="some_modules_folder_name", + config_vars={"some": "config_vars"}, + snowflake_account="some_snowflake_account", + snowflake_user="some_snowflake_user", + snowflake_role="some_snowflake_role", + snowflake_warehouse="some_snowflake_warehouse", + snowflake_database="some_snowflake_database", + snowflake_schema="some_snowflake_schema", + change_history_table="some_history_table", + query_tag="some_query_tag", + oauth_config={"some": "values"}, + ) + e_info_value = str(e_info.value) + assert "Path is not valid directory: some_root_folder_name" in e_info_value + + @mock.patch("pathlib.Path.is_dir", side_effect=[True, False]) + def test_invalid_modules_folder(self, _): + with pytest.raises(Exception) as e_info: + DeployConfig.factory( + config_file_path=Path("some_config_file_name"), + root_folder="some_root_folder_name", + modules_folder="some_modules_folder_name", + config_vars={"some": "config_vars"}, + snowflake_account="some_snowflake_account", + snowflake_user="some_snowflake_user", + snowflake_role="some_snowflake_role", + snowflake_warehouse="some_snowflake_warehouse", + snowflake_database="some_snowflake_database", + snowflake_schema="some_snowflake_schema", + change_history_table="some_history_table", + query_tag="some_query_tag", + oauth_config={"some": "values"}, + ) + e_info_value = str(e_info.value) + assert "Path is not valid directory: some_modules_folder_name" in e_info_value + + def test_config_vars_not_a_dict(self): + with pytest.raises(Exception) as e_info: + BaseConfig.factory( + subcommand="deploy", + config_vars="a string", + config_file_path=Path("."), + ) + assert ( + "config_vars did not parse correctly, please check its configuration" + in str(e_info.value) + ) + + def test_config_vars_reserved_word(self): + with pytest.raises(Exception) as e_info: + BaseConfig.factory( + subcommand="deploy", + config_vars={"schemachange": "not allowed"}, + config_file_path=Path("."), + ) + assert ( + "The variable 'schemachange' has been reserved for use by schemachange, please use a different name" + in str(e_info.value) + ) + + def test_check_for_deploy_args_happy_path(self): + config = DeployConfig.factory( + snowflake_account="account", + snowflake_user="user", + snowflake_role="role", + snowflake_warehouse="warehouse", + config_file_path=Path("."), + ) + config.check_for_deploy_args() + + def test_check_for_deploy_args_exception(self): + config = DeployConfig.factory(config_file_path=Path(".")) + with pytest.raises(ValueError) as e: + config.check_for_deploy_args() + + assert "Missing config values. The following config values are required" in str( + e.value + ) + + +@mock.patch("pathlib.Path.is_file", return_value=False) +def test_render_config_invalid_path(_): + with pytest.raises(Exception) as e_info: + RenderConfig.factory(script_path="invalid path") + assert "invalid file path" in str(e_info) diff --git a/tests/config/test_get_merged_config.py b/tests/config/test_get_merged_config.py new file mode 100644 index 00000000..f2676a93 --- /dev/null +++ b/tests/config/test_get_merged_config.py @@ -0,0 +1,160 @@ +from pathlib import Path +from unittest import mock + +import pytest + +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.config.get_merged_config import get_merged_config + +required_args = [ + "--snowflake-account", + "account", + "--snowflake-user", + "user", + "--snowflake-warehouse", + "warehouse", + "--snowflake-role", + "role", +] + + +class TestGetMergedConfig: + @mock.patch("pathlib.Path.is_dir", return_value=True) + def test_default_config_folder(self, _): + with mock.patch("sys.argv", ["schemachange", *required_args]): + config = get_merged_config() + assert ( + config.config_file_path == Path(".") / config.default_config_file_name + ) + + @mock.patch("pathlib.Path.is_dir", return_value=True) + def test_config_folder(self, _): + with mock.patch( + "sys.argv", ["schemachange", "--config-folder", "DUMMY", *required_args] + ): + config = get_merged_config() + assert ( + config.config_file_path + == Path("DUMMY") / config.default_config_file_name + ) + + @mock.patch("pathlib.Path.is_dir", return_value=False) + def test_invalid_config_folder(self, _): + with pytest.raises(Exception) as e_info: + with mock.patch( + "sys.argv", ["schemachange", "--config-folder", "DUMMY", *required_args] + ): + config = get_merged_config() + assert ( + config.config_file_path + == Path("DUMMY") / config.default_config_file_name + ) + e_info_value = str(e_info.value) + assert "Path is not valid directory: DUMMY" in e_info_value + + @mock.patch("pathlib.Path.is_dir", return_value=True) + def test_no_cli_args(self, _): + with mock.patch( + "sys.argv", ["schemachange", "--config-folder", str(Path(__file__).parent)] + ): + config = get_merged_config() + + assert config.snowflake_account == "snowflake-account-from-yaml" + assert config.snowflake_user == "snowflake-user-from-yaml" + assert config.snowflake_warehouse == '"snowflake-warehouse-from-yaml"' + assert config.snowflake_role == '"snowflake-role-from-yaml"' + assert str(config.root_folder) == "root-folder-from-yaml" + assert str(config.modules_folder) == "modules-folder-from-yaml" + assert config.snowflake_database == '"snowflake-database-from-yaml"' + assert config.snowflake_schema == '"snowflake-schema-from-yaml"' + assert config.change_history_table == ChangeHistoryTable( + table_name='"change-history-table-from-yaml"', + schema_name="SCHEMACHANGE", + database_name="METADATA", + ) + assert config.config_vars == {"var1": "from_yaml", "var2": "also_from_yaml"} + assert config.create_change_history_table is False + assert config.autocommit is False + assert config.dry_run is False + assert config.query_tag == "query-tag-from-yaml" + assert config.oauth_config == { + "token-provider-url": "token-provider-url-from-yaml", + "token-response-name": "token-response-name-from-yaml", + "token-request-headers": { + "Content-Type": "Content-Type-from-yaml", + "User-Agent": "User-Agent-from-yaml", + }, + "token-request-payload": { + "client_id": "id-from-yaml", + "username": "username-from-yaml", + "password": "password-from-yaml", + "grant_type": "type-from-yaml", + "scope": "scope-from-yaml", + }, + } + + @mock.patch("pathlib.Path.is_dir", return_value=True) + def test_all_cli_args(self, _): + with mock.patch( + "sys.argv", + [ + "schemachange", + "--config-folder", + str(Path(__file__).parent), + "--root-folder", + "root-folder-from-cli", + "--modules-folder", + "modules-folder-from-cli", + "--vars", + '{"var1": "from_cli", "var3": "also_from_cli"}', + "--snowflake-account", + "snowflake-account-from-cli", + "--snowflake-user", + "snowflake-user-from-cli", + "--snowflake-role", + "snowflake-role-from-cli", + "--snowflake-warehouse", + "snowflake-warehouse-from-cli", + "--snowflake-database", + "snowflake-database-from-cli", + "--snowflake-schema", + "snowflake-schema-from-cli", + "--change-history-table", + "change-history-table-from-cli", + "--create-change-history-table", + "--autocommit", + "--dry-run", + "--query-tag", + "query-tag-from-cli", + "--oauth-config", + '{"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz"} }', + ], + ): + config = get_merged_config() + + assert config.snowflake_account == "snowflake-account-from-cli" + assert config.snowflake_user == "snowflake-user-from-cli" + assert config.snowflake_warehouse == '"snowflake-warehouse-from-cli"' + assert config.snowflake_role == '"snowflake-role-from-cli"' + assert str(config.root_folder) == "root-folder-from-cli" + assert str(config.modules_folder) == "modules-folder-from-cli" + assert config.snowflake_database == '"snowflake-database-from-cli"' + assert config.snowflake_schema == '"snowflake-schema-from-cli"' + assert config.change_history_table == ChangeHistoryTable( + table_name='"change-history-table-from-cli"', + schema_name="SCHEMACHANGE", + database_name="METADATA", + ) + assert config.config_vars == { + "var1": "from_cli", + "var2": "also_from_yaml", + "var3": "also_from_cli", + } + assert config.create_change_history_table is True + assert config.autocommit is True + assert config.dry_run is True + assert config.query_tag == "query-tag-from-cli" + assert config.oauth_config == { + "token-provider-url": "https//...", + "token-request-payload": {"client_id": "GUID_xyz"}, + } diff --git a/tests/config/test_get_yaml_config.py b/tests/config/test_get_yaml_config.py new file mode 100644 index 00000000..9748fff9 --- /dev/null +++ b/tests/config/test_get_yaml_config.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import os +import unittest.mock as mock +from pathlib import Path + +import pytest + +from schemachange.config.get_merged_config import get_yaml_config_kwargs +from schemachange.config.utils import load_yaml_config + + +# Note Parameters in config file are kebab case and are re-rendered as snake case after +# 'load_yaml_config' is called + + +def test_load_yaml_config__simple_config_file(tmp_path: Path): + config_contents = """ +config-version: 1 +root-folder: scripts +modules-folder: modules +vars: + database_name: SCHEMACHANGE_DEMO_JINJA +""" + config_file = tmp_path / "schemachange-config.yml" + config_file.write_text(config_contents) + + # noinspection PyTypeChecker + config: dict[str, dict[str, str]] = load_yaml_config(config_file) + + assert config["config-version"] == 1 + assert config["root-folder"] == "scripts" + assert config["modules-folder"] == "modules" + assert config["vars"]["database_name"] == "SCHEMACHANGE_DEMO_JINJA" + + +@mock.patch.dict(os.environ, {"TEST_VAR": "env_value"}) +def test_load_yaml_config__with_env_var_should_populate_value( + tmp_path: Path, +): + config_contents = """ +config-version: 1.1 +root-folder: {{env_var('TEST_VAR')}} +modules-folder: modules +vars: + database_name: SCHEMACHANGE_DEMO_JINJA +""" + config_file = tmp_path / "schemachange-config.yml" + config_file.write_text(config_contents) + + config = load_yaml_config(config_file) + + assert config["root-folder"] == "env_value" + + +def test_load_yaml_config__requiring_env_var_but_env_var_not_set_should_raise_exception( + tmp_path: Path, +): + config_contents = """ +config-version: 1.1 +root-folder: {{env_var('TEST_VAR')}} +modules-folder: modules +vars: + database_name: SCHEMACHANGE_DEMO_JINJA +""" + config_file = tmp_path / "schemachange-config.yml" + config_file.write_text(config_contents) + + with pytest.raises(ValueError) as e: + load_yaml_config(config_file) + assert ( + str(e.value) + == "Could not find environmental variable TEST_VAR and no default value was provided" + ) + + +@mock.patch("pathlib.Path.is_dir", return_value=True) +def test_get_yaml_config(_): + config_file_path = Path(__file__).parent / "schemachange-config.yml" + yaml_config = get_yaml_config_kwargs(config_file_path=config_file_path) + assert str(yaml_config["root_folder"]) == "root-folder-from-yaml" + assert str(yaml_config["modules_folder"]) == "modules-folder-from-yaml" + assert yaml_config["snowflake_account"] == "snowflake-account-from-yaml" + assert yaml_config["snowflake_user"] == "snowflake-user-from-yaml" + assert yaml_config["snowflake_role"] == "snowflake-role-from-yaml" + assert yaml_config["snowflake_warehouse"] == "snowflake-warehouse-from-yaml" + assert yaml_config["snowflake_database"] == "snowflake-database-from-yaml" + assert yaml_config["snowflake_schema"] == "snowflake-schema-from-yaml" + assert yaml_config["change_history_table"] == "change-history-table-from-yaml" + assert yaml_config["query_tag"] == "query-tag-from-yaml" + + assert yaml_config["create_change_history_table"] is False + assert yaml_config["autocommit"] is False + assert yaml_config["dry_run"] is False + + assert yaml_config["config_vars"] == {"var1": "from_yaml", "var2": "also_from_yaml"} + assert yaml_config["oauth_config"] == { + "token-provider-url": "token-provider-url-from-yaml", + "token-request-headers": { + "Content-Type": "Content-Type-from-yaml", + "User-Agent": "User-Agent-from-yaml", + }, + "token-request-payload": { + "client_id": "id-from-yaml", + "grant_type": "type-from-yaml", + "password": "password-from-yaml", + "scope": "scope-from-yaml", + "username": "username-from-yaml", + }, + "token-response-name": "token-response-name-from-yaml", + } diff --git a/tests/config/test_parse_cli_args.py b/tests/config/test_parse_cli_args.py new file mode 100644 index 00000000..31e5fa73 --- /dev/null +++ b/tests/config/test_parse_cli_args.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import json + +from schemachange.config.parse_cli_args import parse_cli_args + + +def test_parse_args_defaults(): + args: list[str] = [] + test_args = [("--config-folder", None, ".")] + expected: dict[str, str | int | None] = {} + for arg, value, expected_value in test_args: + if value: + args.extend([arg, value]) + expected_arg = arg.strip("-").replace("-", "_") + expected[expected_arg] = expected_value + + parsed_args = parse_cli_args(args) + for expected_arg, expected_value in expected.items(): + assert parsed_args[expected_arg] == expected_value + assert parsed_args["create_change_history_table"] is None + assert parsed_args["autocommit"] is None + assert parsed_args["dry_run"] is None + assert parsed_args["subcommand"] == "deploy" + + +def test_parse_args_deploy_names(): + args: list[str] = ["deploy"] + expected: dict[str, str | int] = {} + + valued_test_args: list[tuple[str, str, str]] = [ + ("--config-folder", "some_config_folder_name", "some_config_folder_name"), + ("--root-folder", "some_root_folder_name", "some_root_folder_name"), + ("--modules-folder", "some_modules_folder_name", "some_modules_folder_name"), + ("--vars", json.dumps({"some": "vars"}), {"some": "vars"}), + ("--snowflake-account", "some_snowflake_account", "some_snowflake_account"), + ("--snowflake-user", "some_snowflake_user", "some_snowflake_user"), + ("--snowflake-role", "some_snowflake_role", "some_snowflake_role"), + ( + "--snowflake-warehouse", + "some_snowflake_warehouse", + "some_snowflake_warehouse", + ), + ("--snowflake-database", "some_snowflake_database", "some_snowflake_database"), + ("--snowflake-schema", "some_snowflake_schema", "some_snowflake_schema"), + ("--change-history-table", "some_history_table", "some_history_table"), + ("--query-tag", "some_query_tag", "some_query_tag"), + ("--oauth-config", json.dumps({"some": "values"}), {"some": "values"}), + ] + + for arg, value, expected_value in valued_test_args: + if value: + args.extend([arg, value]) + expected_arg = arg.strip("-").replace("-", "_") + expected[expected_arg] = expected_value + + valueless_test_args: list[tuple[str, bool]] = [ + ("--create-change-history-table", True), + ("--autocommit", True), + ("--dry-run", True), + ] + + for arg, expected_value in valueless_test_args: + args.extend([arg]) + expected_arg = arg.strip("-").replace("-", "_") + expected[expected_arg] = expected_value + + parsed_args = parse_cli_args(args) + assert parsed_args["subcommand"] == "deploy" + for expected_arg, expected_value in expected.items(): + if expected_arg == "vars": + expected_arg = "config_vars" + assert parsed_args[expected_arg] == expected_value + + +def test_parse_args_deploy_flags(): + args: list[str] = ["deploy"] + expected: dict[str, str | int] = {} + + valued_test_args: list[tuple[str, str, str, str]] = [ + ("-f", "root_folder", "some_root_folder_name", "some_root_folder_name"), + ( + "-m", + "modules_folder", + "some_modules_folder_name", + "some_modules_folder_name", + ), + ("-a", "snowflake_account", "some_snowflake_account", "some_snowflake_account"), + ("-u", "snowflake_user", "some_snowflake_user", "some_snowflake_user"), + ("-r", "snowflake_role", "some_snowflake_role", "some_snowflake_role"), + ( + "-w", + "snowflake_warehouse", + "some_snowflake_warehouse", + "some_snowflake_warehouse", + ), + ( + "-d", + "snowflake_database", + "some_snowflake_database", + "some_snowflake_database", + ), + ("-s", "snowflake_schema", "some_snowflake_schema", "some_snowflake_schema"), + ("-c", "change_history_table", "some_history_table", "some_history_table"), + ] + + for arg, expected_arg, value, expected_value in valued_test_args: + if value: + args.extend([arg, value]) + expected[expected_arg] = expected_value + + valueless_test_args: list[tuple[str, str, bool]] = [ + ("-ac", "autocommit", True), + ] + + for arg, expected_arg, expected_value in valueless_test_args: + args.extend([arg]) + expected[expected_arg] = expected_value + + parsed_args = parse_cli_args(args) + assert parsed_args["subcommand"] == "deploy" + for expected_arg, expected_value in expected.items(): + assert parsed_args[expected_arg] == expected_value diff --git a/tests/session/__init__.py b/tests/session/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/session/test_Credential.py b/tests/session/test_Credential.py new file mode 100644 index 00000000..6239645f --- /dev/null +++ b/tests/session/test_Credential.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import json +import os +from unittest import mock +from unittest.mock import MagicMock + +import pytest +import structlog + +from schemachange.session.Credential import ( + credential_factory, + PasswordCredential, + ExternalBrowserCredential, + OktaCredential, + PrivateKeyCredential, + OauthCredential, +) + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "env_vars, oauth_config, expected", + [ + ( + {"SNOWFLAKE_PASSWORD": "my-password"}, + None, + PasswordCredential(password="my-password"), + ), + ( + { + "SNOWFLAKE_PASSWORD": "my-password", + "SNOWFLAKE_AUTHENTICATOR": "snowflake", + }, + None, + PasswordCredential(password="my-password"), + ), + ( + { + "SNOWFLAKE_AUTHENTICATOR": "oauth", + }, + { + "token-provider-url": "token-provider-url-from-yaml", + "token-response-name": "token-response-name-from-yaml", + "token-request-headers": { + "Content-Type": "Content-Type-from-yaml", + "User-Agent": "User-Agent-from-yaml", + }, + "token-request-payload": { + "client_id": "id-from-yaml", + "username": "username-from-yaml", + "password": "password-from-yaml", + "grant_type": "type-from-yaml", + "scope": "scope-from-yaml", + }, + }, + OauthCredential(token="my-token"), + ), + ( + { + "SNOWFLAKE_AUTHENTICATOR": "externalbrowser", + }, + None, + ExternalBrowserCredential(), + ), + ( + { + "SNOWFLAKE_AUTHENTICATOR": "https://someurl.com", + "SNOWFLAKE_PASSWORD": "my-password", + }, + None, + OktaCredential(authenticator="https://someurl.com", password="my-password"), + ), + ( + { + "SNOWFLAKE_PRIVATE_KEY_PATH": "some_path", + "SNOWFLAKE_AUTHENTICATOR": "snowflake", + }, + None, + PrivateKeyCredential(private_key="some_path"), + ), + ], +) +@mock.patch( + "schemachange.session.Credential.get_private_key_bytes", + return_value="some_path", +) +@mock.patch("requests.post") +def test_credential_factory( + mock_post, _, env_vars: dict, oauth_config: dict | None, expected: str +): + mock_response = MagicMock() + mock_response.text = json.dumps({"token-response-name-from-yaml": "my-token"}) + mock_post.return_value = mock_response + logger = structlog.testing.CapturingLogger() + + with mock.patch.dict(os.environ, env_vars, clear=True): + # noinspection PyTypeChecker + result = credential_factory(oauth_config=oauth_config, logger=logger) + assert result == expected + + +@pytest.mark.parametrize("env_vars", [{}]) +def test_credential_factory_unhandled(env_vars): + logger = structlog.testing.CapturingLogger() + with pytest.raises(NameError): + with mock.patch.dict(os.environ, env_vars, clear=True): + # noinspection PyTypeChecker + credential_factory(logger=logger) diff --git a/tests/session/test_Script.py b/tests/session/test_Script.py new file mode 100644 index 00000000..41184606 --- /dev/null +++ b/tests/session/test_Script.py @@ -0,0 +1,410 @@ +from __future__ import annotations + +from pathlib import Path +from unittest import mock + +import pytest + +from schemachange.session.Script import ( + Script, + VersionedScript, + RepeatableScript, + AlwaysScript, + script_factory, + get_all_scripts_recursively, +) + + +class TestScript: + @pytest.mark.parametrize( + "file_path, expected", + [ + (Path("nested/file/V123__something.sql.jinja"), "V123__something.sql"), + (Path("nested/file/R__something.sql.jinja"), "R__something.sql"), + (Path("nested/file/A__something.sql.jinja"), "A__something.sql"), + (Path("nested/file/V123__something.sql"), "V123__something.sql"), + (Path("nested/file/R__something.sql"), "R__something.sql"), + (Path("nested/file/A__something.sql"), "A__something.sql"), + ], + ) + def test_get_script_name(self, file_path: Path, expected: str): + result = Script.get_script_name(file_path) + assert result == expected + + @pytest.mark.parametrize( + "file_path, expected", + [ + ( + Path("nested/file/V123__something.sql.jinja"), + VersionedScript( + name="V123__something.sql", + file_path=Path("nested/file/V123__something.sql.jinja"), + description="Something", + version="123", + ), + ), + ( + Path("nested/file/R__something.sql.jinja"), + RepeatableScript( + name="R__something.sql", + file_path=Path("nested/file/R__something.sql.jinja"), + description="Something", + ), + ), + ( + Path("nested/file/A__something.sql.jinja"), + AlwaysScript( + name="A__something.sql", + file_path=Path("nested/file/A__something.sql.jinja"), + description="Something", + ), + ), + ( + Path("nested/file/V123__something.sql"), + VersionedScript( + name="V123__something.sql", + file_path=Path("nested/file/V123__something.sql"), + description="Something", + version="123", + ), + ), + ( + Path("nested/file/R__something.sql"), + RepeatableScript( + name="R__something.sql", + file_path=Path("nested/file/R__something.sql"), + description="Something", + ), + ), + ( + Path("nested/file/A__something.sql"), + AlwaysScript( + name="A__something.sql", + file_path=Path("nested/file/A__something.sql"), + description="Something", + ), + ), + (Path("nested/file/something.sql"), None), + (Path("nested/file/something.sql.jinja"), None), + ( + Path("nested/file/A__a_longer_name.sql"), + AlwaysScript( + name="A__a_longer_name.sql", + file_path=Path("nested/file/A__a_longer_name.sql"), + description="A longer name", + ), + ), + ], + ) + def test_script_factory(self, file_path: Path, expected: Script): + result = script_factory(file_path) + assert result == expected + + +class TestGetAllScriptsRecursively: + def test_given_empty_folder_should_return_empty(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [[], []] + root_directory = Path( + "C:\\Users\\T001ZC\\KYFB_Projects\\temp_repo\\dataplatform_sf\\migrations" + ) + result = get_all_scripts_recursively(root_directory) + + assert result == dict() + + def test_given_just_non_change_files_should_return_empty(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("README.txt"), + Path("subfolder") / "subfolder2" / "something.sql", + Path("subfolder") / "subfolder2" / "testing.py", + ], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert result == dict() + + ############################ + #### Version file tests #### + ############################ + + def test_version_number_regex_numeric_happy_path(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("V1.1.1__initial.sql"), + Path("subfolder") / "V1.1.2__update.SQL", + Path("subfolder") / "subfolder2" / "V1.1.3__update.sql", + ], + [], + ] + + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 3 + assert "v1.1.1__initial.sql" in result + assert "v1.1.2__update.sql" in result + assert "v1.1.3__update.sql" in result + + def test_version_number_regex_text_happy_path(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("Va.b.c__initial.sql"), + ], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + assert len(result) == 1 + assert "va.b.c__initial.sql" in result + + def test_given_version_files_should_return_version_files(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("V1.1.1__initial.sql"), + Path("subfolder") / "V1.1.2__update.SQL", + Path("subfolder") / "subfolder2" / "V1.1.3__update.sql", + ], + [], + ] + + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 3 + assert "v1.1.1__initial.sql" in result + assert "v1.1.2__update.sql" in result + assert "v1.1.3__update.sql" in result + + def test_given_same_version_twice_should_raise_exception(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("V1.1.1__initial.sql"), + Path("subfolder") / "V1.1.1__update.sql", + Path("subfolder") / "subfolder2" / "V1.1.2__update.sql", + ], + [], + ] + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script version 1.1.1 exists more than once (second instance" + ) + + def test_given_single_version_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "V1.1.1.1__THIS_is_my_test.sql"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["v1.1.1.1__this_is_my_test.sql"] + assert script.name == "V1.1.1.1__THIS_is_my_test.sql" + assert script.file_path == Path("subfolder") / "V1.1.1.1__THIS_is_my_test.sql" + assert script.type == "V" + assert script.version == "1.1.1.1" + assert script.description == "This is my test" + + def test_given_single_version_jinja_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "V1.1.1.2__THIS_is_my_test.sql.jinja"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["v1.1.1.2__this_is_my_test.sql"] + assert script.name == "V1.1.1.2__THIS_is_my_test.sql" + assert ( + script.file_path + == Path("subfolder") / "V1.1.1.2__THIS_is_my_test.sql.jinja" + ) + assert script.type == "V" + assert script.version == "1.1.1.2" + assert script.description == "This is my test" + + def test_given_same_version_file_with_and_without_jinja_extension_should_raise_exception( + self, + ): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("V1.1.1__initial.sql"), + Path("V1.1.1__initial.sql.jinja"), + ], + [], + ] + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script name V1.1.1__initial.sql exists more than once (first_instance" + ) + + ########################### + #### Always file tests #### + ########################### + + def test_given_always_files_should_return_always_files(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("A__proc1.sql"), + Path("subfolder") / "A__proc2.SQL", + Path("subfolder") / "subfolder2" / "A__proc3.sql", + ], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 3 + assert "a__proc1.sql" in result + assert "a__proc2.sql" in result + assert "a__proc3.sql" in result + + def test_given_same_always_file_should_raise_exception(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("A__initial.sql"), + Path("subfolder") / "A__initial.sql", + Path("subfolder") / "subfolder2" / "A__proc3.sql", + ], + [], + ] + + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script name A__initial.sql exists more than once (first_instance " + ) + + def test_given_single_always_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "A__THIS_is_my_test.sql"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["a__this_is_my_test.sql"] + assert script.name == "A__THIS_is_my_test.sql" + assert script.file_path == Path("subfolder") / "A__THIS_is_my_test.sql" + assert script.type == "A" + assert script.description == "This is my test" + + def test_given_single_always_jinja_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "A__THIS_is_my_test.sql.jinja"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["a__this_is_my_test.sql"] + assert script.name == "A__THIS_is_my_test.sql" + assert script.file_path == Path("subfolder") / "A__THIS_is_my_test.sql.jinja" + assert script.type == "A" + assert script.description == "This is my test" + + def test_given_same_always_file_with_and_without_jinja_extension_should_raise_exception( + self, + ): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("A__initial.sql"), Path("A__initial.sql.jinja")], + [], + ] + + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script name A__initial.sql exists more than once (first_instance " + ) + + ############################### + #### Repeatable file tests #### + ############################### + + def test_given_repeatable_files_should_return_repeatable_files(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("R__proc1.sql"), + Path("subfolder") / "R__proc2.SQL", + Path("subfolder") / "subfolder2" / "R__proc3.sql", + ], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 3 + assert "r__proc1.sql" in result + assert "r__proc2.sql" in result + assert "r__proc3.sql" in result + + def test_given_same_repeatable_file_should_raise_exception(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [ + Path("R__initial.sql"), + Path("subfolder") / "R__initial.SQL", + ], + [], + ] + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script name R__initial.SQL exists more than once (first_instance " + ) + + def test_given_single_repeatable_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "R__THIS_is_my_test.sql"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["r__this_is_my_test.sql"] + assert script.name == "R__THIS_is_my_test.sql" + assert script.file_path == Path("subfolder") / "R__THIS_is_my_test.sql" + assert script.type == "R" + assert script.description == "This is my test" + + def test_given_single_repeatable_jinja_file_should_extract_attributes(self): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("subfolder") / "R__THIS_is_my_test.sql.jinja"], + [], + ] + result = get_all_scripts_recursively(Path("scripts")) + + assert len(result) == 1 + script = result["r__this_is_my_test.sql"] + assert script.name == "R__THIS_is_my_test.sql" + assert script.file_path == Path("subfolder") / "R__THIS_is_my_test.sql.jinja" + assert script.type == "R" + assert script.description == "This is my test" + + def test_given_same_repeatable_file_with_and_without_jinja_extension_should_raise_exception( + self, + ): + with mock.patch("pathlib.Path.rglob") as mock_rglob: + mock_rglob.side_effect = [ + [Path("R__initial.sql"), Path("R__initial.sql.jinja")], + [], + ] + with pytest.raises(ValueError) as e: + get_all_scripts_recursively(Path("scripts")) + assert str(e.value).startswith( + "The script name R__initial.sql exists more than once (first_instance " + ) diff --git a/tests/session/test_SnowflakeSession.py b/tests/session/test_SnowflakeSession.py new file mode 100644 index 00000000..68f308a5 --- /dev/null +++ b/tests/session/test_SnowflakeSession.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from unittest import mock + +import pytest +import structlog + +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.session.Credential import ExternalBrowserCredential +from schemachange.session.SnowflakeSession import SnowflakeSession + + +@pytest.fixture +def session() -> SnowflakeSession: + credential = ExternalBrowserCredential(password="password") + change_history_table = ChangeHistoryTable() + logger = structlog.testing.CapturingLogger() + + with mock.patch("snowflake.connector.connect"): + # noinspection PyTypeChecker + return SnowflakeSession( + snowflake_user="user", + snowflake_account="account", + snowflake_role="role", + snowflake_warehouse="warehouse", + schemachange_version="3.6.1.dev", + application="schemachange", + credential=credential, + change_history_table=change_history_table, + logger=logger, + ) + + +class TestSnowflakeSession: + def test_fetch_change_history_metadata_exists(self, session: SnowflakeSession): + session.con.execute_string.return_value = [[["created", "last_altered"]]] + result = session.fetch_change_history_metadata() + assert result == {"created": "created", "last_altered": "last_altered"} + assert session.con.execute_string.call_count == 1 + assert session.logger.calls[0][1][0] == "Executing query" + + def test_fetch_change_history_metadata_does_not_exist( + self, session: SnowflakeSession + ): + session.con.execute_string.return_value = [[]] + result = session.fetch_change_history_metadata() + assert result == {} + assert session.con.execute_string.call_count == 1 + assert session.logger.calls[0][1][0] == "Executing query" diff --git a/tests/session/test_utils.py b/tests/session/test_utils.py new file mode 100644 index 00000000..a3077b52 --- /dev/null +++ b/tests/session/test_utils.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import os +from unittest import mock + +import pytest + +from schemachange.session.utils import ( + get_snowflake_password, + get_private_key_password, +) + + +@pytest.mark.parametrize( + "env_vars, expected", + [ + ({"SNOWFLAKE_PASSWORD": "my-password"}, "my-password"), + ({"SNOWFLAKE_PASSWORD": ""}, None), + ({}, None), + ({"SNOWSQL_PWD": "my-password"}, "my-password"), + ( + {"SNOWSQL_PWD": "my-password", "SNOWFLAKE_PASSWORD": "my-password"}, + "my-password", + ), + ], +) +def test_get_snowflake_password(env_vars: dict, expected: str): + with mock.patch.dict(os.environ, env_vars, clear=True): + result = get_snowflake_password() + assert result == expected + + +@pytest.mark.parametrize( + "env_vars, expected", + [ + ({"SNOWFLAKE_PRIVATE_KEY_PASSPHRASE": "my-passphrase"}, b"my-passphrase"), + ({}, None), + ], +) +def test_get_private_key_password(env_vars: dict, expected: str): + with mock.patch.dict(os.environ, env_vars, clear=True): + result = get_private_key_password() + assert result == expected diff --git a/tests/test_JinjaEnvVar.py b/tests/test_JinjaEnvVar.py index 1e497d0f..7bb2750f 100644 --- a/tests/test_JinjaEnvVar.py +++ b/tests/test_JinjaEnvVar.py @@ -1,9 +1,12 @@ +from __future__ import annotations + import os import unittest.mock as mock + import jinja2 import pytest -from schemachange.cli import JinjaEnvVar +from schemachange.JinjaEnvVar import JinjaEnvVar @mock.patch.dict(os.environ, {}, clear=True) @@ -20,7 +23,6 @@ def test_env_var_with_no_default_and_no_environmental_variables_should_raise_exc @mock.patch.dict(os.environ, {}, clear=True) def test_env_var_with_default_and_no_environmental_variables_should_return_default(): - print(os.environ) assert ("SF_DATABASE" in os.environ) is False result = JinjaEnvVar.env_var("SF_DATABASE", "SCHEMACHANGE_DEMO") @@ -34,7 +36,7 @@ def test_env_var_with_default_and_environmental_variables_should_return_environm @mock.patch.dict(os.environ, {"SF_DATABASE": "SCHEMACHANGE_DEMO_3"}, clear=True) -def test_JinjaEnvVar_with_jinja_template(): +def test_jinjaenvvar_with_jinja_template(): template = jinja2.Template( "{{env_var('SF_DATABASE', 'SCHEMACHANGE_DEMO')}}", extensions=[JinjaEnvVar] ) diff --git a/tests/test_JinjaTemplateProcessor.py b/tests/test_JinjaTemplateProcessor.py index 5297cb5a..3ae9bc95 100644 --- a/tests/test_JinjaTemplateProcessor.py +++ b/tests/test_JinjaTemplateProcessor.py @@ -1,63 +1,105 @@ +from __future__ import annotations + import json +import os import pathlib import pytest from jinja2 import DictLoader from jinja2.exceptions import UndefinedError -from schemachange.cli import JinjaTemplateProcessor +from schemachange.JinjaTemplateProcessor import JinjaTemplateProcessor + + +@pytest.fixture() +def processor() -> JinjaTemplateProcessor: + return JinjaTemplateProcessor(pathlib.Path("."), None) + + +class TestJinjaTemplateProcessor: + def test_render_simple_string(self, processor: JinjaTemplateProcessor): + # override the default loader + templates = {"test.sql": "some text"} + processor.override_loader(DictLoader(templates)) + + context = processor.render("test.sql", None) + + assert context == "some text" + + def test_render_simple_string_expecting_variable_that_does_not_exist_should_raise_exception( + self, processor: JinjaTemplateProcessor + ): + # overide the default loader + templates = {"test.sql": "some text {{ myvar }}"} + processor.override_loader(DictLoader(templates)) + + with pytest.raises(UndefinedError) as e: + processor.render("test.sql", None) -def test_JinjaTemplateProcessor_render_simple_string(): - processor = JinjaTemplateProcessor("", None) + assert str(e.value) == "'myvar' is undefined" - # overide the default loader - templates = {"test.sql": "some text"} - processor.override_loader(DictLoader(templates)) + def test_render_simple_string_expecting_variable( + self, processor: JinjaTemplateProcessor + ): + # overide the default loader + templates = {"test.sql": "Hello {{ myvar }}!"} + processor.override_loader(DictLoader(templates)) - context = processor.render("test.sql", None, True) + variables = json.loads('{"myvar" : "world"}') - assert context == "some text" + context = processor.render("test.sql", variables) + assert context == "Hello world!" -def test_JinjaTemplateProcessor_render_simple_string_expecting_variable_that_does_not_exist_should_raise_exception(): - processor = JinjaTemplateProcessor("", None) + def test_render_from_subfolder(self, tmp_path: pathlib.Path): + root_folder = tmp_path / "MORE2" - # overide the default loader - templates = {"test.sql": "some text {{ myvar }}"} - processor.override_loader(DictLoader(templates)) + root_folder.mkdir() + script_folder = root_folder / "SQL" + script_folder.mkdir() + script_file = script_folder / "1.0.0_my_test.sql" + script_file.write_text("Hello world!") - with pytest.raises(UndefinedError) as e: - processor.render("test.sql", None, True) + processor = JinjaTemplateProcessor(root_folder, None) + template_path = processor.relpath(script_file) - assert str(e.value) == "'myvar' is undefined" + context = processor.render(template_path, {}) + assert context == "Hello world!" -def test_JinjaTemplateProcessor_render_simple_string_expecting_variable(): - processor = JinjaTemplateProcessor("", None) + def test_from_environ_not_set(self, processor: JinjaTemplateProcessor): + # overide the default loader + templates = {"test.sql": "some text {{ env_var('MYVAR') }}"} + processor.override_loader(DictLoader(templates)) - # overide the default loader - templates = {"test.sql": "Hello {{ myvar }}!"} - processor.override_loader(DictLoader(templates)) + with pytest.raises(ValueError) as e: + processor.render("test.sql", None) - vars = json.loads('{"myvar" : "world"}') + assert ( + str(e.value) + == "Could not find environmental variable MYVAR and no default value was provided" + ) - context = processor.render("test.sql", vars, True) + def test_from_environ_set(self, processor: JinjaTemplateProcessor): + # set MYVAR env variable + os.environ["MYVAR"] = "myvar_from_environment" - assert context == "Hello world!" + # overide the default loader + templates = {"test.sql": "some text {{ env_var('MYVAR') }}"} + processor.override_loader(DictLoader(templates)) + context = processor.render("test.sql", None) -def test_JinjaTemplateProcessor_render_from_subfolder(tmp_path: pathlib.Path): - root_folder = tmp_path / "MORE2" + # unset MYVAR env variable + del os.environ["MYVAR"] - root_folder.mkdir() - script_folder = root_folder / "SQL" - script_folder.mkdir() - script_file = script_folder / "1.0.0_my_test.sql" - script_file.write_text("Hello world!") + assert context == "some text myvar_from_environment" - processor = JinjaTemplateProcessor(str(root_folder), None) - template_path = processor.relpath(str(script_file)) + def test_from_environ_not_set_default(self, processor: JinjaTemplateProcessor): + # overide the default loader + templates = {"test.sql": "some text {{ env_var('MYVAR', 'myvar_default') }}"} + processor.override_loader(DictLoader(templates)) - context = processor.render(template_path, {}, True) + context = processor.render("test.sql", None) - assert context == "Hello world!" + assert context == "some text myvar_default" diff --git a/tests/test_SecretManager.py b/tests/test_SecretManager.py deleted file mode 100644 index d1312034..00000000 --- a/tests/test_SecretManager.py +++ /dev/null @@ -1,85 +0,0 @@ -from schemachange.cli import SecretManager - - -def test_SecretManager_given_no_secrets_when_redact_then_return_original_value(): - sm = SecretManager() - result = sm.redact("My string") - assert result == "My string" - - -def test_SecretManager_given_secrets_when_redact_on_none_then_return_none(): - sm = SecretManager() - sm.add("world") - result = sm.redact(None) - assert result is None - - -def test_SecretManager_given_secrets_when_redact_then_return_redacted_value(): - sm = SecretManager() - sm.add("world") - result = sm.redact("Hello world!") - assert result == "Hello *****!" - - -def test_SecretManager_given_secrets_when_clear_then_should_hold_zero_secrets(): - sm = SecretManager() - sm.add("world") - sm.add("Hello") - - # check private variable - assert len(sm._SecretManager__secrets) == 2 - - sm.clear() - - # check private variable - assert len(sm._SecretManager__secrets) == 0 - - -def test_SecretManager_given_one_secrets_when_add_range_with_None_then_Count_should_remain_one(): - sm = SecretManager() - sm.add("world") - sm.add_range(None) - - assert len(sm._SecretManager__secrets) == 1 - - -def test_SecretManager_given_one_secrets_when_add_range_with_empty_set_then_Count_should_remain_one(): - sm = SecretManager() - sm.add("world") - - range = set() - sm.add_range(range) - - assert len(sm._SecretManager__secrets) == 1 - - -def test_SecretManager_given_one_secrets_when_add_range_with_two_secrets_then_count_of_secrets_three(): - sm = SecretManager() - sm.add("world") - - range = {"one", "two"} - sm.add_range(range) - - # check private variable - assert len(sm._SecretManager__secrets) == 3 - assert "world" in sm._SecretManager__secrets - assert "one" in sm._SecretManager__secrets - assert "two" in sm._SecretManager__secrets - - -# test static methods - - -def test_SecretManager_check_global_assignment_round_trip(): - sm = SecretManager() - - SecretManager.set_global_manager(sm) - assert SecretManager.get_global_manager() is sm - - -def test_SecretManager_global_redact(): - sm = SecretManager() - sm.add("Hello") - SecretManager.set_global_manager(sm) - - assert SecretManager.global_redact("Hello World!") == "***** World!" diff --git a/tests/test_cli_misc.py b/tests/test_cli_misc.py index 3b24bd88..d5c0ef87 100644 --- a/tests/test_cli_misc.py +++ b/tests/test_cli_misc.py @@ -1,41 +1,48 @@ -import schemachange.cli +from dataclasses import asdict + import pytest +from schemachange.cli import SCHEMACHANGE_VERSION, SNOWFLAKE_APPLICATION_NAME +from schemachange.config.BaseConfig import BaseConfig +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable +from schemachange.config.utils import get_snowflake_identifier_string +from schemachange.deploy import alphanum_convert, get_alphanum_key, sorted_alphanumeric + def test_cli_given__schemachange_version_change_updated_in_setup_config_file(): - assert schemachange.cli._schemachange_version == "3.7.0" + assert SCHEMACHANGE_VERSION == "3.7.0" def test_cli_given__constants_exist(): - assert schemachange.cli._config_file_name == "schemachange-config.yml" - assert schemachange.cli._metadata_database_name == "METADATA" - assert schemachange.cli._metadata_schema_name == "SCHEMACHANGE" - assert schemachange.cli._metadata_table_name == "CHANGE_HISTORY" - assert schemachange.cli._snowflake_application_name == "schemachange" + assert BaseConfig.default_config_file_name == "schemachange-config.yml" + assert ChangeHistoryTable._default_database_name == "METADATA" + assert ChangeHistoryTable._default_schema_name == "SCHEMACHANGE" + assert ChangeHistoryTable._default_table_name == "CHANGE_HISTORY" + assert SNOWFLAKE_APPLICATION_NAME == "schemachange" def test_alphanum_convert_given__integer(): - assert schemachange.cli.alphanum_convert("123") == 123 + assert alphanum_convert("123") == 123 def test_alphanum_convert_given__lowercase(): - assert schemachange.cli.alphanum_convert("TEST") == "test" + assert alphanum_convert("TEST") == "test" def test_get_alphanum_key_given__empty_string(): - assert schemachange.cli.get_alphanum_key("") == [""] + assert get_alphanum_key("") == [""] def test_get_alphanum_key_given__numbers_only(): - assert schemachange.cli.get_alphanum_key("123") == ["", 123, ""] + assert get_alphanum_key("123") == ["", 123, ""] def test_get_alphanum_key_given__alphabets_only(): - assert schemachange.cli.get_alphanum_key("abc") == ["abc"] + assert get_alphanum_key("abc") == ["abc"] def test_get_alphanum_key_given__upper_alphanumeric(): - assert schemachange.cli.get_alphanum_key("V1.2.3__") == [ + assert get_alphanum_key("V1.2.3__") == [ "v", 1, ".", @@ -47,13 +54,14 @@ def test_get_alphanum_key_given__upper_alphanumeric(): def test_get_alphanum_key_given__valid_version_string(): - assert schemachange.cli.get_alphanum_key("1.2.2") == ["", 1, ".", 2, ".", 2, ""] + assert get_alphanum_key("1.2.2") == ["", 1, ".", 2, ".", 2, ""] def test_sorted_alphanumeric_mixed_string(): - assert schemachange.cli.sorted_alphanumeric( - ["V1.2.3__file.sql", "V1.2.4__file.sql"] - ) == ["V1.2.3__file.sql", "V1.2.4__file.sql"] + assert sorted_alphanumeric(["V1.2.3__file.sql", "V1.2.4__file.sql"]) == [ + "V1.2.3__file.sql", + "V1.2.4__file.sql", + ] @pytest.mark.parametrize( @@ -62,47 +70,47 @@ def test_sorted_alphanumeric_mixed_string(): ( None, { - "database_name": schemachange.cli._metadata_database_name.upper(), - "schema_name": schemachange.cli._metadata_schema_name.upper(), - "table_name": schemachange.cli._metadata_table_name.upper(), + "database_name": ChangeHistoryTable._default_database_name, + "schema_name": ChangeHistoryTable._default_schema_name, + "table_name": ChangeHistoryTable._default_table_name, }, ), ( "change_history_table", { - "database_name": schemachange.cli._metadata_database_name.upper(), - "schema_name": schemachange.cli._metadata_schema_name.upper(), - "table_name": "change_history_table".upper(), + "database_name": ChangeHistoryTable._default_database_name, + "schema_name": ChangeHistoryTable._default_schema_name, + "table_name": "change_history_table", }, ), ( "myschema.change_history_table", { - "database_name": schemachange.cli._metadata_database_name.upper(), - "schema_name": "myschema".upper(), - "table_name": "change_history_table".upper(), + "database_name": ChangeHistoryTable._default_database_name, + "schema_name": "myschema", + "table_name": "change_history_table", }, ), ( "mydb.myschema.change_history_table", { - "database_name": "mydb".upper(), - "schema_name": "myschema".upper(), - "table_name": "change_history_table".upper(), + "database_name": "mydb", + "schema_name": "myschema", + "table_name": "change_history_table", }, ), ( '"change-history-table"', { - "database_name": schemachange.cli._metadata_database_name.upper(), - "schema_name": schemachange.cli._metadata_schema_name.upper(), + "database_name": ChangeHistoryTable._default_database_name, + "schema_name": ChangeHistoryTable._default_schema_name, "table_name": '"change-history-table"', }, ), ( '"my-schema"."change-history-table"', { - "database_name": schemachange.cli._metadata_database_name.upper(), + "database_name": ChangeHistoryTable._default_database_name, "schema_name": '"my-schema"', "table_name": '"change-history-table"', }, @@ -120,15 +128,13 @@ def test_sorted_alphanumeric_mixed_string(): def test_get_change_history_table_details_given__acceptable_values_produces_fully_qualified_change_history_table_name( cht, expected ): - assert schemachange.cli.get_change_history_table_details(cht) == expected + assert asdict(ChangeHistoryTable.from_str(cht)) == expected -@pytest.mark.parametrize( - "cht", [("fifth.fourth.third.two.one"), ("fourth.third.two.one")] -) +@pytest.mark.parametrize("cht", ["fifth.fourth.third.two.one", "fourth.third.two.one"]) def test_get_change_history_table_details_given__unacceptable_values_raises_error(cht): with pytest.raises(ValueError) as e: - schemachange.cli.get_change_history_table_details(cht) + ChangeHistoryTable.from_str(cht) assert str(e.value).startswith("Invalid change history table name: ") @@ -145,10 +151,7 @@ def test_get_change_history_table_details_given__unacceptable_values_raises_erro def test__get_snowflake_identifier_string_given__acceptable_values_produces_properly_quoted_snowflake_identifier( input_value, input_type, expected_value ): - assert ( - schemachange.cli.get_snowflake_identifier_string(input_value, input_type) - == expected_value - ) + assert get_snowflake_identifier_string(input_value, input_type) == expected_value @pytest.mark.parametrize( @@ -159,6 +162,6 @@ def test__get_snowflake_identifier_string_given__unacceptable_values_raises_erro input_value, input_type ): with pytest.raises(ValueError) as e: - schemachange.cli.get_snowflake_identifier_string(input_value, input_type) + get_snowflake_identifier_string(input_value, input_type) assert str(e.value).startswith(f"Invalid {input_type}: ") diff --git a/tests/test_extract_config_secrets.py b/tests/test_extract_config_secrets.py deleted file mode 100644 index 48571029..00000000 --- a/tests/test_extract_config_secrets.py +++ /dev/null @@ -1,86 +0,0 @@ -import pytest -from schemachange.cli import extract_config_secrets - - -def test_extract_config_secrets_given_empty_config_should_produce_empty_set(): - config = {} - assert len(extract_config_secrets(config)) == 0 - - -def test_extract_config_secrets_given_None_should_produce_empty_set(): - assert len(extract_config_secrets(None)) == 0 - - -@pytest.mark.parametrize( - "config, secret", - [ - ({"vars": {"secret": "secret_val1"}}, "secret_val1"), - ({"vars": {"SECret": "secret_val2"}}, "secret_val2"), - ({"vars": {"secret_key": "secret_val3"}}, "secret_val3"), - ({"vars": {"s3_bucket_secret": "secret_val4"}}, "secret_val4"), - ({"vars": {"s3SecretKey": "secret_val5"}}, "secret_val5"), - ({"vars": {"nested": {"s3_bucket_secret": "secret_val6"}}}, "secret_val6"), - ], -) -def test_extract_config_secrets_given__vars_with_keys_should_extract_secret( - config, secret -): - results = extract_config_secrets(config) - assert secret in results - - -def test_extract_config_secrets_given__vars_with_secrets_key_then_all_children_should_be_treated_as_secrets(): - config = { - "vars": { - "secrets": { - "database_name": "database_name_val", - "schema_name": "schema_name_val", - "nested_secrets": {"SEC_ONE": "SEC_ONE_VAL"}, - } - } - } - - results = extract_config_secrets(config) - - assert len(results) == 3 - assert "database_name_val" in results - assert "schema_name_val" in results - assert "SEC_ONE_VAL" in results - - -def test_extract_config_secrets_given__vars_with_nested_secrets_key_then_all_children_should_be_treated_as_secrets(): - config = { - "vars": { - "nested": { - "secrets": { - "database_name": "database_name_val", - "schema_name": "schema_name_val", - "nested": {"SEC_ONE": "SEC_ONE_VAL"}, - } - } - } - } - - results = extract_config_secrets(config) - - assert len(results) == 3 - assert "database_name_val" in results - assert "schema_name_val" in results - assert "SEC_ONE_VAL" in results - - -def test_extract_config_secrets_given__vars_with_same_secret_twice_then_only_extracted_once(): - config = { - "vars": { - "secrets": { - "database_name": "SECRET_VALUE", - "schema_name": "SECRET_VALUE", - "nested_secrets": {"SEC_ONE": "SECRET_VALUE"}, - } - } - } - - results = extract_config_secrets(config) - - assert len(results) == 1 - assert "SECRET_VALUE" in results diff --git a/tests/test_get_all_scripts_recursively.py b/tests/test_get_all_scripts_recursively.py deleted file mode 100644 index 88428b11..00000000 --- a/tests/test_get_all_scripts_recursively.py +++ /dev/null @@ -1,274 +0,0 @@ -import os -import unittest.mock as mock -import pytest -from schemachange.cli import get_all_scripts_recursively - - -# Generic tests - - -def test_get_all_scripts_recursively__given_empty_folder_should_return_empty(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [] - result = get_all_scripts_recursively("scripts", False) - - assert result == dict() - - -def test_get_all_scripts_recursively__given_just_non_change_files_should_return_empty(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("README.txt",)), - ("subfolder", ("subfolder2"), ("something.sql",)), - (f"subfolder{os.sep}subfolder2", (""), ("testing.py",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert result == dict() - - -# Version file tests - - -def test_get_all_scripts_recursively__given_Version_files_should_return_version_files(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("V1.1.1__intial.sql",)), - ("subfolder", ("subfolder2"), ("V1.1.2__update.SQL",)), - (f"subfolder{os.sep}subfolder2", (""), ("V1.1.3__update.sql",)), - ] - - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 3 - assert "V1.1.1__intial.sql" in result - assert "V1.1.2__update.SQL" in result - assert "V1.1.3__update.sql" in result - - -def test_get_all_scripts_recursively__given_same_Version_twice_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("V1.1.1__intial.sql",)), - ("subfolder", ("subfolder2"), ("V1.1.1__update.sql",)), - (f"subfolder{os.sep}subfolder2", (""), ("V1.1.2__update.sql",)), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script version 1.1.1 exists more than once (second instance" - ) - - -def test_get_all_scripts_recursively__given_single_Version_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("V1.1.1.1__THIS_is_my_test.sql",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["V1.1.1.1__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "V1.1.1.1__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "V1.1.1.1__THIS_is_my_test.sql" - ) - assert file_attributes["script_type"] == "V" - assert file_attributes["script_version"] == "1.1.1.1" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_single_Version_jinja_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("V1.1.1.2__THIS_is_my_test.sql.jinja",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["V1.1.1.2__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "V1.1.1.2__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "V1.1.1.2__THIS_is_my_test.sql.jinja" - ) - assert file_attributes["script_type"] == "V" - assert file_attributes["script_version"] == "1.1.1.2" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_same_version_file_with_and_without_jinja_extension_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", (""), ("V1.1.1__intial.sql", "V1.1.1__intial.sql.jinja")), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script name V1.1.1__intial.sql exists more than once (first_instance" - ) - - -# Always file tests - - -def test_get_all_scripts_recursively__given_Always_files_should_return_always_files(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("A__proc1.sql",)), - ("subfolder", ("subfolder2"), ("A__proc2.SQL",)), - (f"subfolder{os.sep}subfolder2", (""), ("A__proc3.sql",)), - ] - - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 3 - assert "A__proc1.sql" in result - assert "A__proc2.SQL" in result - assert "A__proc3.sql" in result - - -def test_get_all_scripts_recursively__given_same_Always_file_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("A__intial.sql",)), - ("subfolder", (), ("A__intial.sql",)), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script name A__intial.sql exists more than once (first_instance " - ) - - -def test_get_all_scripts_recursively__given_single_Always_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("A__THIS_is_my_test.sql",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["A__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "A__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "A__THIS_is_my_test.sql" - ) - assert file_attributes["script_type"] == "A" - assert file_attributes["script_version"] == "" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_single_Always_jinja_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("A__THIS_is_my_test.sql.jinja",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["A__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "A__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "A__THIS_is_my_test.sql.jinja" - ) - assert file_attributes["script_type"] == "A" - assert file_attributes["script_version"] == "" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_same_Always_file_with_and_without_jinja_extension_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", (""), ("A__intial.sql", "A__intial.sql.jinja")), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script name A__intial.sql exists more than once (first_instance " - ) - - -# Repeatable file tests - - -def test_get_all_scripts_recursively__given_Repeatable_files_should_return_repeatable_files(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("R__proc1.sql",)), - ("subfolder", ("subfolder2"), ("R__proc2.SQL",)), - (f"subfolder{os.sep}subfolder2", (), ("R__proc3.sql",)), - ] - - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 3 - assert "R__proc1.sql" in result - assert "R__proc2.SQL" in result - assert "R__proc3.sql" in result - - -def test_get_all_scripts_recursively__given_same_Repeatable_file_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", ("subfolder"), ("R__intial.sql",)), - ("subfolder", (), ("R__intial.sql",)), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script name R__intial.sql exists more than once (first_instance " - ) - - -def test_get_all_scripts_recursively__given_single_Repeatable_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("R__THIS_is_my_test.sql",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["R__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "R__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "R__THIS_is_my_test.sql" - ) - assert file_attributes["script_type"] == "R" - assert file_attributes["script_version"] == "" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_single_Repeatable_jinja_file_should_extract_attributes(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("subfolder", (), ("R__THIS_is_my_test.sql.jinja",)), - ] - result = get_all_scripts_recursively("scripts", False) - - assert len(result) == 1 - file_attributes = result["R__THIS_is_my_test.sql"] - assert file_attributes["script_name"] == "R__THIS_is_my_test.sql" - assert file_attributes["script_full_path"] == os.path.join( - "subfolder", "R__THIS_is_my_test.sql.jinja" - ) - assert file_attributes["script_type"] == "R" - assert file_attributes["script_version"] == "" - assert file_attributes["script_description"] == "This is my test" - - -def test_get_all_scripts_recursively__given_same_Repeatable_file_with_and_without_jinja_extension_should_raise_exception(): - with mock.patch("os.walk") as mockwalk: - mockwalk.return_value = [ - ("", (""), ("R__intial.sql", "R__intial.sql.jinja")), - ] - - with pytest.raises(ValueError) as e: - get_all_scripts_recursively("scripts", False) - assert str(e.value).startswith( - "The script name R__intial.sql exists more than once (first_instance " - ) diff --git a/tests/test_jinja_env_var_template.py b/tests/test_jinja_env_var_template.py deleted file mode 100644 index b5a37c4e..00000000 --- a/tests/test_jinja_env_var_template.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - -import pytest -from jinja2 import DictLoader -from schemachange.cli import JinjaTemplateProcessor - - -def test_from_environ_not_set(): - processor = JinjaTemplateProcessor("", None) - - # overide the default loader - templates = {"test.sql": "some text {{ env_var('MYVAR') }}"} - processor.override_loader(DictLoader(templates)) - - with pytest.raises(ValueError) as e: - processor.render("test.sql", None, True) - - assert ( - str(e.value) - == "Could not find environmental variable MYVAR and no default value was provided" - ) - - -def test_from_environ_set(): - processor = JinjaTemplateProcessor("", None) - - # set MYVAR env variable - os.environ["MYVAR"] = "myvar_from_environment" - - # overide the default loader - templates = {"test.sql": "some text {{ env_var('MYVAR') }}"} - processor.override_loader(DictLoader(templates)) - - context = processor.render("test.sql", None, True) - - # unset MYVAR env variable - del os.environ["MYVAR"] - - assert context == "some text myvar_from_environment" - - -def test_from_environ_not_set_default(): - processor = JinjaTemplateProcessor("", None) - - # overide the default loader - templates = {"test.sql": "some text {{ env_var('MYVAR', 'myvar_default') }}"} - processor.override_loader(DictLoader(templates)) - - context = processor.render("test.sql", None, True) - - assert context == "some text myvar_default" diff --git a/tests/test_load_schemachange_config.py b/tests/test_load_schemachange_config.py deleted file mode 100644 index dd92c903..00000000 --- a/tests/test_load_schemachange_config.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import pathlib -import unittest.mock as mock - -import pytest - -from schemachange.cli import load_schemachange_config - -# Note Paramters in config file are kebab case and are re-rendered as snake case after 'load_schemachange_config' is called - - -def test__load_schemachange_config__simple_config_file(tmp_path: pathlib.Path): - config_contents = """ -config-version: 1 -root-folder: scripts -modules-folder: modules -vars: - database_name: SCHEMACHANGE_DEMO_JINJA -""" - config_file = tmp_path / "schemachange-config.yml" - config_file.write_text(config_contents) - - config = load_schemachange_config(str(config_file)) - - assert config["config-version"] == 1 - assert config["root-folder"] == "scripts" - assert config["modules-folder"] == "modules" - assert config["vars"]["database_name"] == "SCHEMACHANGE_DEMO_JINJA" - - -@mock.patch.dict(os.environ, {"TEST_VAR": "env_value"}) -def test__load_schemachange_config__with_env_var_should_populate_value( - tmp_path: pathlib.Path, -): - config_contents = """ -config-version: 1.1 -root-folder: {{env_var('TEST_VAR')}} -modules-folder: modules -vars: - database_name: SCHEMACHANGE_DEMO_JINJA -""" - config_file = tmp_path / "schemachange-config.yml" - config_file.write_text(config_contents) - - config = load_schemachange_config(str(config_file)) - - assert config["root-folder"] == "env_value" - - -def test__load_schemachange_config__requiring_env_var_but_env_var_not_set_should_raise_exception( - tmp_path: pathlib.Path, -): - config_contents = """ -config-version: 1.1 -root-folder: {{env_var('TEST_VAR')}} -modules-folder: modules -vars: - database_name: SCHEMACHANGE_DEMO_JINJA -""" - config_file = tmp_path / "schemachange-config.yml" - config_file.write_text(config_contents) - - with pytest.raises(ValueError) as e: - load_schemachange_config(str(config_file)) - assert ( - str(e.value) - == "Could not find environmental variable TEST_VAR and no default value was provided" - ) diff --git a/tests/test_main.py b/tests/test_main.py index 7b4e98fc..384c4048 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,204 +1,393 @@ +from __future__ import annotations + +import logging import os import tempfile import unittest.mock as mock +from dataclasses import asdict +from pathlib import Path from textwrap import dedent import pytest +from schemachange.config.ChangeHistoryTable import ChangeHistoryTable -import schemachange.cli +import schemachange.cli as cli -DEFAULT_CONFIG = { - "root_folder": os.path.abspath("."), +default_base_config = { + # Shared configuration options + "config_file_path": Path(".") / "schemachange-config.yml", + "root_folder": Path("."), "modules_folder": None, + "config_vars": {}, +} + +default_deploy_config = { + **default_base_config, + # Deploy configuration options "snowflake_account": None, "snowflake_user": None, "snowflake_role": None, "snowflake_warehouse": None, "snowflake_database": None, "snowflake_schema": None, - "change_history_table": None, - "vars": {}, + "change_history_table": ChangeHistoryTable( + table_name="CHANGE_HISTORY", + schema_name="SCHEMACHANGE", + database_name="METADATA", + ), "create_change_history_table": False, "autocommit": False, - "verbose": False, "dry_run": False, "query_tag": None, "oauth_config": None, } +required_args = [ + "--snowflake-account", + "account", + "--snowflake-user", + "user", + "--snowflake-warehouse", + "warehouse", + "--snowflake-role", + "role", +] + +required_config = { + "snowflake_account": "account", + "snowflake_user": "user", + "snowflake_warehouse": "warehouse", + "snowflake_role": "role", +} +script_path = Path(__file__).parent.parent / "demo" / "basics_demo" / "A__basic001.sql" + @pytest.mark.parametrize( - "args, expected", + "to_mock, cli_args, expected_config, expected_script_path", [ - (["schemachange"], DEFAULT_CONFIG), - (["schemachange", "deploy"], DEFAULT_CONFIG), - ( - ["schemachange", "deploy", "-f", "."], - {**DEFAULT_CONFIG, "root_folder": os.path.abspath(".")}, - ), ( - ["schemachange", "deploy", "--snowflake-account", "account"], - {**DEFAULT_CONFIG, "snowflake_account": "account"}, + "schemachange.cli.deploy", + ["schemachange", *required_args], + {**default_deploy_config, **required_config}, + None, ), ( - ["schemachange", "deploy", "--snowflake-user", "user"], - {**DEFAULT_CONFIG, "snowflake_user": "user"}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args], + {**default_deploy_config, **required_config}, + None, ), ( - ["schemachange", "deploy", "--snowflake-role", "role"], - {**DEFAULT_CONFIG, "snowflake_role": "role"}, + "schemachange.cli.deploy", + ["schemachange", "deploy", "-f", ".", *required_args], + {**default_deploy_config, **required_config, "root_folder": Path(".")}, + None, ), ( - ["schemachange", "deploy", "--snowflake-warehouse", "warehouse"], - {**DEFAULT_CONFIG, "snowflake_warehouse": "warehouse"}, + "schemachange.cli.deploy", + [ + "schemachange", + "deploy", + *required_args, + "--snowflake-database", + "database", + ], + { + **default_deploy_config, + **required_config, + "snowflake_database": "database", + }, + None, ), ( - ["schemachange", "deploy", "--snowflake-database", "database"], - {**DEFAULT_CONFIG, "snowflake_database": "database"}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--snowflake-schema", "schema"], + {**default_deploy_config, **required_config, "snowflake_schema": "schema"}, + None, ), ( - ["schemachange", "deploy", "--snowflake-schema", "schema"], - {**DEFAULT_CONFIG, "snowflake_schema": "schema"}, + "schemachange.cli.deploy", + [ + "schemachange", + "deploy", + *required_args, + "--change-history-table", + "db.schema.table", + ], + { + **default_deploy_config, + **required_config, + "change_history_table": ChangeHistoryTable( + database_name="db", schema_name="schema", table_name="table" + ), + }, + None, ), ( - ["schemachange", "deploy", "--change-history-table", "db.schema.table"], - {**DEFAULT_CONFIG, "change_history_table": "db.schema.table"}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--vars", '{"var1": "val"}'], + { + **default_deploy_config, + **required_config, + "config_vars": {"var1": "val"}, + }, + None, ), ( - ["schemachange", "deploy", "--vars", '{"var1": "val"}'], + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--create-change-history-table"], { - **DEFAULT_CONFIG, - "vars": {"var1": "val"}, + **default_deploy_config, + **required_config, + "create_change_history_table": True, }, + None, ), ( - ["schemachange", "deploy", "--create-change-history-table"], - {**DEFAULT_CONFIG, "create_change_history_table": True}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--autocommit"], + {**default_deploy_config, **required_config, "autocommit": True}, + None, ), ( - ["schemachange", "deploy", "--autocommit"], - {**DEFAULT_CONFIG, "autocommit": True}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--verbose"], + {**default_deploy_config, **required_config, "log_level": logging.DEBUG}, + None, ), - (["schemachange", "deploy", "--verbose"], {**DEFAULT_CONFIG, "verbose": True}), - (["schemachange", "deploy", "--dry-run"], {**DEFAULT_CONFIG, "dry_run": True}), ( - ["schemachange", "deploy", "--query-tag", "querytag"], - {**DEFAULT_CONFIG, "query_tag": "querytag"}, + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--dry-run"], + {**default_deploy_config, **required_config, "dry_run": True}, + None, ), ( + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--query-tag", "querytag"], + {**default_deploy_config, **required_config, "query_tag": "querytag"}, + None, + ), + ( + "schemachange.cli.deploy", [ "schemachange", "deploy", + *required_args, "--oauth-config", '{"token-provider-url": "https//..."}', ], { - **DEFAULT_CONFIG, + **default_deploy_config, + **required_config, "oauth_config": {"token-provider-url": "https//..."}, }, + None, ), - ], -) -def test_main_deploy_subcommand_given_arguments_make_sure_arguments_set_on_call( - args, expected -): - with mock.patch("schemachange.cli.deploy_command") as mock_deploy_command: - schemachange.cli.main(args) - mock_deploy_command.assert_called_once() ( + "schemachange.cli.deploy", [ - config, + "schemachange", + "deploy", + *required_args, ], - _call_kwargs, - ) = mock_deploy_command.call_args - assert config == expected - - -@pytest.mark.parametrize( - "args, expected", - [ - (["schemachange", "render", "script.sql"], ({**DEFAULT_CONFIG}, "script.sql")), + { + **default_deploy_config, + **required_config, + "log_level": 20, + }, + None, + ), ( - ["schemachange", "render", "--root-folder", ".", "script.sql"], - ({**DEFAULT_CONFIG, "root_folder": os.path.abspath(".")}, "script.sql"), + "schemachange.cli.render", + [ + "schemachange", + "render", + str(script_path), + ], + {**default_base_config}, + script_path, ), ( - ["schemachange", "render", "--vars", '{"var1": "val"}', "script.sql"], - ({**DEFAULT_CONFIG, "vars": {"var1": "val"}}, "script.sql"), + "schemachange.cli.render", + [ + "schemachange", + "render", + "--root-folder", + ".", + str(script_path), + ], + {**default_base_config, "root_folder": Path(".")}, + script_path, ), ( - ["schemachange", "render", "--verbose", "script.sql"], - ({**DEFAULT_CONFIG, "verbose": True}, "script.sql"), + "schemachange.cli.render", + [ + "schemachange", + "render", + "--vars", + '{"var1": "val"}', + str(script_path), + ], + {**default_base_config, "config_vars": {"var1": "val"}}, + script_path, + ), + ( + "schemachange.cli.render", + [ + "schemachange", + "render", + "--verbose", + str(script_path), + ], + {**default_base_config, "log_level": logging.DEBUG}, + script_path, ), ], ) -def test_main_render_subcommand_given_arguments_make_sure_arguments_set_on_call( - args, expected +@mock.patch("schemachange.session.SnowflakeSession.snowflake.connector.connect") +def test_main_deploy_subcommand_given_arguments_make_sure_arguments_set_on_call( + _, + to_mock: str, + cli_args: list[str], + expected_config: dict, + expected_script_path: Path | None, ): - with mock.patch("schemachange.cli.render_command") as mock_render_command: - schemachange.cli.main(args) - mock_render_command.assert_called_once() - call_args, _call_kwargs = mock_render_command.call_args - assert call_args == expected + with mock.patch.dict(os.environ, {"SNOWFLAKE_PASSWORD": "password"}, clear=True): + with mock.patch("sys.argv", cli_args): + with mock.patch(to_mock) as mock_command: + cli.main() + mock_command.assert_called_once() + _, call_kwargs = mock_command.call_args + for expected_arg, expected_value in expected_config.items(): + actual_value = getattr(call_kwargs["config"], expected_arg) + if hasattr(actual_value, "table_name"): + assert asdict(actual_value) == asdict(expected_value) + else: + assert actual_value == expected_value + if expected_script_path is not None: + assert call_kwargs["script_path"] == expected_script_path @pytest.mark.parametrize( - "args, to_mock, expected_args", + "to_mock, args, expected_config, expected_script_path", [ ( - ["schemachange", "deploy", "--config-folder", "DUMMY"], - "schemachange.cli.deploy_command", - ({**DEFAULT_CONFIG, "snowflake_account": "account"},), + "schemachange.cli.deploy", + [ + "schemachange", + "deploy", + "--config-folder", + "DUMMY", + ], + { + **default_deploy_config, + "snowflake_user": "user", + "snowflake_warehouse": "warehouse", + "snowflake_role": "role", + "snowflake_account": "account", + }, + None, ), ( - ["schemachange", "render", "script.sql", "--config-folder", "DUMMY"], - "schemachange.cli.render_command", - ({**DEFAULT_CONFIG, "snowflake_account": "account"}, "script.sql"), + "schemachange.cli.render", + [ + "schemachange", + "render", + str(script_path), + "--config-folder", + "DUMMY", + ], + default_base_config, + script_path, ), ], ) -def test_main_deploy_config_folder(args, to_mock, expected_args): - with tempfile.TemporaryDirectory() as d: - with open(os.path.join(d, "schemachange-config.yml"), "w") as f: - f.write( - dedent( - """ - snowflake_account: account - """ +@mock.patch("schemachange.session.SnowflakeSession.snowflake.connector.connect") +def test_main_deploy_config_folder( + _, + to_mock: str, + args: list[str], + expected_config: dict, + expected_script_path: Path | None, +): + with mock.patch.dict(os.environ, {"SNOWFLAKE_PASSWORD": "password"}, clear=True): + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, "schemachange-config.yml"), "w") as f: + f.write( + dedent( + """ + snowflake_account: account + snowflake_user: user + snowflake_warehouse: warehouse + snowflake_role: role + """ + ) ) - ) - args[args.index("DUMMY")] = d + args[args.index("DUMMY")] = d + expected_config["config_file_path"] = Path(d) / "schemachange-config.yml" - with mock.patch(to_mock) as mock_command: - schemachange.cli.main(args) - mock_command.assert_called_once() - call_args, _call_kwargs = mock_command.call_args - assert call_args == expected_args + with mock.patch(to_mock) as mock_command: + with mock.patch("sys.argv", args): + cli.main() + mock_command.assert_called_once() + _, call_kwargs = mock_command.call_args + for expected_arg, expected_value in expected_config.items(): + actual_value = getattr(call_kwargs["config"], expected_arg) + if hasattr(actual_value, "table_name"): + assert asdict(actual_value) == asdict(expected_value) + else: + assert actual_value == expected_value + if expected_script_path is not None: + assert call_kwargs["script_path"] == expected_script_path @pytest.mark.parametrize( - "args, to_mock, expected_args", + "to_mock, args, expected_config, expected_script_path", [ ( - ["schemachange", "deploy", "--modules-folder", "DUMMY"], - "schemachange.cli.deploy_command", - ({**DEFAULT_CONFIG, "modules_folder": "DUMMY"},), + "schemachange.cli.deploy", + ["schemachange", "deploy", *required_args, "--modules-folder", "DUMMY"], + {**default_deploy_config, **required_config, "modules_folder": "DUMMY"}, + None, ), ( - ["schemachange", "render", "script.sql", "--modules-folder", "DUMMY"], - "schemachange.cli.render_command", - ({**DEFAULT_CONFIG, "modules_folder": "DUMMY"}, "script.sql"), + "schemachange.cli.render", + [ + "schemachange", + "render", + str(script_path), + "--modules-folder", + "DUMMY", + ], + {**default_base_config, "modules_folder": "DUMMY"}, + script_path, ), ], ) -def test_main_deploy_modules_folder(args, to_mock, expected_args): - with tempfile.TemporaryDirectory() as d: - args[args.index("DUMMY")] = d - expected_args[0]["modules_folder"] = d - - with mock.patch(to_mock) as mock_command: - schemachange.cli.main(args) - mock_command.assert_called_once() - call_args, _call_kwargs = mock_command.call_args - assert call_args == expected_args +@mock.patch("schemachange.session.SnowflakeSession.snowflake.connector.connect") +def test_main_deploy_modules_folder( + _, + to_mock: str, + args: list[str], + expected_config: dict, + expected_script_path: Path | None, +): + with mock.patch.dict(os.environ, {"SNOWFLAKE_PASSWORD": "password"}, clear=True): + with tempfile.TemporaryDirectory() as d: + args[args.index("DUMMY")] = d + expected_config["modules_folder"] = Path(d) + + with mock.patch(to_mock) as mock_command: + with mock.patch("sys.argv", args): + cli.main() + mock_command.assert_called_once() + _, call_kwargs = mock_command.call_args + for expected_arg, expected_value in expected_config.items(): + actual_value = getattr(call_kwargs["config"], expected_arg) + if hasattr(actual_value, "table_name"): + assert asdict(actual_value) == asdict(expected_value) + else: + assert actual_value == expected_value + if expected_script_path is not None: + assert call_kwargs["script_path"] == expected_script_path diff --git a/tests/test_redact_config_secrets.py b/tests/test_redact_config_secrets.py new file mode 100644 index 00000000..2801f0a9 --- /dev/null +++ b/tests/test_redact_config_secrets.py @@ -0,0 +1,106 @@ +from __future__ import annotations + + +import pytest +import structlog + +from schemachange.redact_config_secrets import ( + get_redact_config_secrets_processor, + redact_config_secrets, +) + + +class TestRedactConfigSecrets: + def test_skip_processor_config_with_empty_config_secrets(self): + prev_cfg = structlog.get_config() + redact_config_secrets(config_secrets=set()) + new_cfg = structlog.get_config() + + assert len(prev_cfg["processors"]) == len(new_cfg["processors"]) + + def test_processor_config_with_populated_config_secrets(self): + prev_cfg = structlog.get_config() + redact_config_secrets(config_secrets={"secret"}) + new_cfg = structlog.get_config() + + assert len(prev_cfg["processors"]) == len(new_cfg["processors"]) + + +class TestGetRedactConfigSecretsProcessor: + @pytest.mark.parametrize( + "secrets, extra_kwargs, expected", + [ + ({}, {"keyword": "secret"}, {"keyword": "secret"}), + ({}, {}, {}), + ({"secret"}, {"keyword": "secret"}, {"keyword": "******"}), + ( + {"secret"}, + {"keyword": {"keyword": "secret"}}, + {"keyword": {"keyword": "******"}}, + ), + ( + {"secret"}, + {"keyword": {"keyword": {"keyword": "secret"}}}, + {"keyword": {"keyword": {"keyword": "******"}}}, + ), + ( + {"secret"}, + {"keyword": {"keyword": {"keyword": "secret"}}}, + {"keyword": {"keyword": {"keyword": "******"}}}, + ), + ( + {"12345"}, + {"keyword": {"keyword": {"keyword": 12345}}}, + {"keyword": {"keyword": {"keyword": "*****"}}}, + ), + ], + ) + def test_happy_path(self, secrets: set[str], extra_kwargs: dict, expected: dict): + redact_config_secrets_processor = get_redact_config_secrets_processor( + config_secrets=secrets + ) + + # noinspection PyTypeChecker + result = redact_config_secrets_processor( + None, "info", {"event": "event text", "level": "info", **extra_kwargs} + ) + + assert result == {"event": "event text", "level": "info", **expected} + + @pytest.mark.parametrize( + "extra_kwargs, expected_warning", + [ + ( + { + "keyword": { + "keyword": { + "keyword": { + "keyword": { + "keyword": { + "keyword": {"keyword": {"keyword": "secret"}} + } + } + } + } + } + }, + "Unable to redact deeply nested secrets in log", + ), + ( + {"keyword": object()}, + "Unable to redact object log arguments in log", + ), + ], + ) + def test_warnings(self, extra_kwargs: dict, expected_warning: str): + redact_config_secrets_processor = get_redact_config_secrets_processor( + config_secrets={"secret"} + ) + + with pytest.warns(UserWarning) as e: + # noinspection PyTypeChecker + redact_config_secrets_processor( + None, "info", {"event": "event text", "level": "info", **extra_kwargs} + ) + + assert expected_warning in str(e[0].message)