From 32d440fdd977b90400064d66c54d0215fd7e83c1 Mon Sep 17 00:00:00 2001 From: Jason Freeberg Date: Tue, 8 Aug 2023 15:48:17 -0700 Subject: [PATCH] v1.2.0 update (#7) - Add setup.py and editable install - Update environment.yml to create the editable install - Sproc now returns a table - Set OpenSSL version to address this issue: https://github.com/wbond/oscrypto/issues/75 - Consolidate test req's into requirements.txt - Update README for the corresponding changes above --- README.md | 50 ++++++++++++---------------- app.toml | 1 - environment.yml | 9 +++-- requirements-test.txt | 2 -- requirements.txt | 4 ++- resources.sql | 6 ++-- setup.py | 12 +++++++ src/{procs => }/__init__.py | 0 src/app.py | 48 +++++++++++++++++++++++++++ src/{udf => }/functions.py | 2 +- src/procs/app.py | 57 -------------------------------- src/udf/__init__.py | 0 test/conftest.py | 16 +++++++++ test/procs/test_app.py | 24 -------------- test/test_app.py | 17 ++++++++++ test/{udf => }/test_functions.py | 6 +++- 16 files changed, 131 insertions(+), 123 deletions(-) delete mode 100644 requirements-test.txt create mode 100644 setup.py rename src/{procs => }/__init__.py (100%) create mode 100644 src/app.py rename src/{udf => }/functions.py (100%) delete mode 100644 src/procs/app.py delete mode 100644 src/udf/__init__.py create mode 100644 test/conftest.py delete mode 100644 test/procs/test_app.py create mode 100644 test/test_app.py rename test/{udf => }/test_functions.py (61%) diff --git a/README.md b/README.md index 3cd7a3e..a03744d 100644 --- a/README.md +++ b/README.md @@ -8,18 +8,20 @@ Set the following environment variables with your Snowflake account information: ```bash # Linux/MacOS -set SNOWSQL_ACCOUNT= -set SNOWSQL_USER= -set SNOWSQL_PWD= -set SNOWSQL_DATABASE= -set SNOWSQL_SCHEMA= -set SNOWSQL_WAREHOUSE= +export SNOWSQL_ACCOUNT= +export SNOWSQL_USER= +export SNOWSQL_ROLE= +export SNOWSQL_PWD= +export SNOWSQL_DATABASE= +export SNOWSQL_SCHEMA= +export SNOWSQL_WAREHOUSE= ``` ```powershell # Windows/PowerShell $env:SNOWSQL_ACCOUNT = "" $env:SNOWSQL_USER = "" +$env:SNOWSQL_ROLE = "" $env:SNOWSQL_PWD = "" $env:SNOWSQL_DATABASE = "" $env:SNOWSQL_SCHEMA = "" @@ -31,38 +33,37 @@ using the System Properties menu (on Windows). ### Install dependencies -Set up a virtual environment using [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [virtualenv](https://docs.python.org/3/library/venv.html). - -#### Anaconda +Create and activate a conda environment using [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands): ```bash -conda env create -f environment.yml +conda env create --file environment.yml conda activate snowpark ``` -#### Virtualenv +### Configure IDE -```bash -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` +#### VS Code + +Press `Ctrl`+`Shift`+`P` to open the command palette, then select **Python: Select Interpreter** and select the **snowpark** interpreter under the **Conda** list. + +#### PyCharm + +Go to **File** > **Settings** > **Project** > **Python Interpreter** and select the snowpark interpreter. ## Prereqs To develop your applications locally, you will need - A Snowflake account -- Python 3.8 +- Python 3.8 or greater - An IDE or code editor (VS Code, PyCharm, etc.) ## Usage Once you've set your credentials and installed the packages, you can test your connection to Snowflake by executing the stored procedure in [`app.py`](src/procs/app.py): -``` -cd src -python procs/app.py +```bash +python src/app.py ``` You should see the following output: @@ -80,8 +81,7 @@ You should see the following output: You can run the test suite locally from the project root: -``` -pip install -r requirements-test.txt +```bash python -m pytest ``` @@ -90,12 +90,6 @@ python -m pytest The GitHub Actions [workflow file](.github/workflows/build-and-deploy.yml) allows you to continously deploy your objects to Snowflake. When you're ready, create secrets in your GitHub repository with the same name and values as the environment variables you created earler (`SNOWSQL_PWD`, `SNOWSQL_ACCOUNT`, etc.). The workflow will create a stage, upload the Python source code, and create the stored procedure object. For more information, see [`resources.sql`](resources.sql). -## Project Structure - -- [procs/](src/procs/): Directory for stored procedures -- [udf/](src/udf/): Directory for your user-defined functions -- [util/](src/util/): Directory for methods/classes shared between UDFs and procedures - ## Docs - [Snowpark Developer Guide for Python](https://docs.snowflake.com/en/developer-guide/snowpark/python/index) diff --git a/app.toml b/app.toml index c64b834..700ad49 100644 --- a/app.toml +++ b/app.toml @@ -1,7 +1,6 @@ snowsql_config_path = "" snowsql_connection_name = "" -[dev] database = "" schema = "" role = "" diff --git a/environment.yml b/environment.yml index e376b9d..5b1d311 100644 --- a/environment.yml +++ b/environment.yml @@ -5,10 +5,9 @@ name: snowpark channels: - snowflake dependencies: - - python=3.8 + - python=3.9 + - openssl=3.0.9 # Addresses this issue with oscrypto: https://github.com/wbond/oscrypto/issues/75 - pip - - snowflake-snowpark-python - - toml - - tomli - pip: - - "-r requirements-test.txt" + - "-r requirements.txt" + - "--editable ." diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 6ca258f..0000000 --- a/requirements-test.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest -snowflake-vcrpy @ git+https://github.com/Snowflake-Labs/snowflake-vcrpy.git@v0.1.1 diff --git a/requirements.txt b/requirements.txt index c32f41c..e8deba4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ -snowflake-snowpark-python +snowflake-snowpark-python[pandas] tomli toml +pytest +snowflake-vcrpy @ git+https://github.com/Snowflake-Labs/snowflake-vcrpy.git@v0.1.1 diff --git a/resources.sql b/resources.sql index ba46761..768a519 100644 --- a/resources.sql +++ b/resources.sql @@ -6,11 +6,11 @@ CREATE STAGE IF NOT EXISTS artifacts; PUT file://&artifact_name @artifacts AUTO_COMPRESS=FALSE OVERWRITE=TRUE; CREATE OR REPLACE PROCEDURE HELLO_WORLD_PROC() - RETURNS integer + RETURNS TABLE() LANGUAGE PYTHON RUNTIME_VERSION = 3.8 IMPORTS = ('@artifacts/&artifact_name') - HANDLER = 'src.procs.app.run' + HANDLER = 'src.app.run' PACKAGES = ('pytest','snowflake-snowpark-python','tomli','toml'); CREATE OR REPLACE FUNCTION COMBINE(a String, b String) @@ -18,5 +18,5 @@ CREATE OR REPLACE FUNCTION COMBINE(a String, b String) LANGUAGE PYTHON RUNTIME_VERSION = 3.8 IMPORTS = ('@artifacts/&artifact_name') - HANDLER = 'src.udf.functions.combine' + HANDLER = 'src.functions.combine' PACKAGES = ('pytest','snowflake-snowpark-python','tomli','toml'); diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..19a6bf4 --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +""" +Run `conda env create --file environment.yaml` to create an editable +install of this project +""" + +from setuptools import setup, find_packages + +setup( + name="Example Snowpark Python project", + version="0.1.0", + packages=find_packages() +) diff --git a/src/procs/__init__.py b/src/__init__.py similarity index 100% rename from src/procs/__init__.py rename to src/__init__.py diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..95b1ff4 --- /dev/null +++ b/src/app.py @@ -0,0 +1,48 @@ +""" +An example stored procedure. __main__ provides an entrypoint for local development +and testing. +""" + +from snowflake.snowpark.session import Session +from snowflake.snowpark.dataframe import col, DataFrame +from snowflake.snowpark.functions import udf +from src import functions + +def run(snowpark_session: Session) -> DataFrame: + """ + A sample stored procedure which creates a small DataFrame, prints it to the + console, and returns the number of rows in the table. + """ + + combine_udf = udf(functions.combine) + + schema = ["col_1", "col_2"] + + data = [ + ("Welcome to ", "Snowflake!"), + ("Learn more: ", "https://www.snowflake.com/snowpark/"), + ] + + df = snowpark_session.create_dataframe(data, schema) + + df2 = df.select(combine_udf(col("col_1"), col("col_2")).as_("hello_world")).sort( + "hello_world", ascending=False + ) + + return df2 + + +if __name__ == "__main__": + # This entrypoint is used for local development (`$ python src/procs/app.py`) + + from src.util.local import get_env_var_config + + print("Creating session...") + session = Session.builder.configs(get_env_var_config()).create() + session.add_import(functions.__file__, 'src.functions') + + print("Running stored procedure...") + result = run(session) + + print("Stored procedure complete:") + result.show() diff --git a/src/udf/functions.py b/src/functions.py similarity index 100% rename from src/udf/functions.py rename to src/functions.py index 0e37926..9b5834a 100644 --- a/src/udf/functions.py +++ b/src/functions.py @@ -2,9 +2,9 @@ This module contains the UDFs for the project. """ - def combine(string_a: str, string_b: str) -> str: """ A sample UDF implementation """ + return string_a + string_b diff --git a/src/procs/app.py b/src/procs/app.py deleted file mode 100644 index 7d18916..0000000 --- a/src/procs/app.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -An example stored procedure. __main__ provides an entrypoint for local development -and testing. -""" - -from snowflake.snowpark.session import Session -from snowflake.snowpark.dataframe import col, DataFrame -from snowflake.snowpark.types import StringType - - -def run(snowpark_session: Session) -> int: - """ - A sample stored procedure which creates a small DataFrame, prints it to the - console, and returns the number of rows in the table. - """ - - # Register UDF - from src.udf.functions import combine - - snowpark_session.add_import( - path="../src/udf/functions.py", import_path="src.udf.functions" - ) - combine = snowpark_session.udf.register( - combine, StringType(), input_types=[StringType(), StringType()] - ) - - schema = ["col_1", "col_2"] - - data = [ - ("Welcome to ", "Snowflake!"), - ("Learn more: ", "https://www.snowflake.com/snowpark/"), - ] - - df: DataFrame = snowpark_session.create_dataframe(data, schema) - - df2 = df.select(combine(col("col_1"), col("col_2")).as_("Hello world")).sort( - "Hello world", ascending=False - ) - - df2.show() - return df2.count() - - -if __name__ == "__main__": - # This entrypoint is used for local development. - - import sys - - sys.path.insert(0, "..") # Necessary to import from udf and util directories - - from src.util.local import get_env_var_config - - print("Creating session...") - session = Session.builder.configs(get_env_var_config()).create() - - print("Running stored proc...") - run(session) diff --git a/src/udf/__init__.py b/src/udf/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..332d54e --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,16 @@ +""" +Fixtures and configurations for the PyTest suite +""" + +import pytest +from snowflake.snowpark.session import Session +from src.util.local import get_env_var_config + +@pytest.fixture +def session(scope='module'): + # pylint: disable=unused-argument + """ + Creates a Session object for tests + """ + + return Session.builder.configs(get_env_var_config()).create() diff --git a/test/procs/test_app.py b/test/procs/test_app.py deleted file mode 100644 index cfe38d7..0000000 --- a/test/procs/test_app.py +++ /dev/null @@ -1,24 +0,0 @@ -import os -import pytest -from snowflake.snowpark.session import Session -from snowflake.snowpark.types import StringType -from src.util.local import get_env_var_config -from src.procs.app import run - - -@pytest.fixture(autouse=True) -def set_working_directory(): - # Sets the working directory to sources root so relative imports resolve properly - os.chdir("src") - - -@pytest.fixture -def local_session(): - return Session.builder.configs(get_env_var_config()).create() - - -@pytest.mark.snowflake_vcr -def test_app_dim(local_session): - expected_n_rows = 2 - actual_n_rows = run(local_session) - assert expected_n_rows == actual_n_rows diff --git a/test/test_app.py b/test/test_app.py new file mode 100644 index 0000000..284d699 --- /dev/null +++ b/test/test_app.py @@ -0,0 +1,17 @@ +""" +Tests for the procedure module. +""" + +from snowflake.snowpark.session import Session +from src import functions +from src.app import run + +def test_app_dim(session: Session): + session.add_import(functions.__file__, 'src.functions') + expected = session.create_dataframe( + [["Welcome to Snowflake!"], ["Learn more: https://www.snowflake.com/snowpark/"]], + ["hello_world"]) + + actual = run(session) + + assert expected.collect() == actual.collect() diff --git a/test/udf/test_functions.py b/test/test_functions.py similarity index 61% rename from test/udf/test_functions.py rename to test/test_functions.py index 2d74f3a..9898a25 100644 --- a/test/udf/test_functions.py +++ b/test/test_functions.py @@ -1,7 +1,11 @@ -from src.udf.functions import combine +""" +Tests for the functions module. +""" +from src.functions import combine def test_combine(): expected = "hello world" actual = combine("hello ", "world") + assert expected == actual