Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jhansen committed Sep 27, 2023
1 parent a8f4396 commit 110239a
Show file tree
Hide file tree
Showing 24 changed files with 796 additions and 1 deletion.
19 changes: 19 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM mcr.microsoft.com/devcontainers/miniconda:0-3

USER vscode
WORKDIR /home/vscode

# Configure SnowSQL
RUN mkdir .snowsql
COPY .devcontainer/config .snowsql

# Install SnowSQL
RUN curl -O https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.28-linux_x86_64.bash \
&& SNOWSQL_DEST=~/bin SNOWSQL_LOGIN_SHELL=~/.profile bash snowsql-1.2.28-linux_x86_64.bash \
&& rm snowsql-1.2.28-linux_x86_64.bash

# Create the conda environment
COPY environment.yml .
RUN conda env create \
&& conda init \
&& rm environment.yml
9 changes: 9 additions & 0 deletions .devcontainer/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# SnowSQL config

[connections.dev]
accountname = myaccount
username = myusername
password = mypassword
rolename = HOL_ROLE
warehousename = HOL_WH
dbname = HOL_DB
31 changes: 31 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/miniconda
{
"name": "Snowflake Demo Codespace",
"build": {
"context": "..",
"dockerfile": "Dockerfile"
},

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": []

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "conda init",

// Configure tool-specific properties.
"customizations": {
"vscode": {
"settings": {
"python.defaultInterpreterPath": "/opt/conda/envs/snowflake-demo",
"python.terminal.activateEnvInCurrentTerminal": true
},
"extensions": [
"snowflake.snowflake-vsc"
]
}
}
}
48 changes: 48 additions & 0 deletions .github/workflows/build_and_deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Deploy Snowpark Apps

# Controls when the action will run.
on:
push:
branches:
- main

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
deploy:
runs-on: ubuntu-latest

steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Checkout repository
uses: actions/checkout@v3

- name: Setup Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install Python packages
run: pip install -r requirements.txt

- name: Configure snowcli
env:
SNOWSQL_ACCOUNT: ${{ secrets.SNOWSQL_ACCOUNT }}
SNOWSQL_USER: ${{ secrets.SNOWSQL_USER }}
SNOWSQL_PWD: ${{ secrets.SNOWSQL_PWD }}
SNOWSQL_ROLE: ${{ secrets.SNOWSQL_ROLE }}
SNOWSQL_WAREHOUSE: ${{ secrets.SNOWSQL_WAREHOUSE }}
SNOWSQL_DATABASE: ${{ secrets.SNOWSQL_DATABASE }}
run: |
cd $GITHUB_WORKSPACE
echo "[connections.dev]" > config
echo "accountname = $SNOWSQL_ACCOUNT" >> config
echo "username = $SNOWSQL_USER" >> config
echo "password = $SNOWSQL_PWD" >> config
echo "rolename = $SNOWSQL_ROLE" >> config
echo "warehousename = $SNOWSQL_WAREHOUSE" >> config
echo "dbname = $SNOWSQL_DATABASE" >> config
- name: Deploy Snowpark apps
run: python deploy_snowpark_apps.py $GITHUB_WORKSPACE
135 changes: 135 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Mac files
.DS_Store

# Snowpark specific files
creds.json

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
1 change: 1 addition & 0 deletions LEGAL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This application is not part of the Snowflake Service and is governed by the terms in LICENSE, unless expressly agreed to in writing. You use this application at your own risk, and Snowflake has no obligation to support your use of this application.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# sfguide-data-engineering-with-snowpark-python-intro
# Intro to Data Engineering with Snowpark Python
This repository contains the code for the *Intro to Data Engineering with Snowpark Python* Snowflake Quickstart.

### ➡️ For overview, prerequisites, and to learn more, complete this end-to-end tutorial [Intro to Data Engineering with Snowpark Python](https://quickstarts.snowflake.com/) on quickstarts.snowflake.com.

___
Here is an overview of what we'll build in this lab:

<img src="images/demo_overview.png" width=800px>
Binary file added data/location.xlsx
Binary file not shown.
Binary file added data/order_detail.xlsx
Binary file not shown.
42 changes: 42 additions & 0 deletions deploy_snowpark_apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import sys;
import os;

ignore_folders = ['__pycache__', '.ipynb_checkpoints']

if len(sys.argv) != 2:
print("Root directory is required")
exit()

root_directory = sys.argv[1]
print(f"Deploying all Snowpark apps in root directory {root_directory}")

# Walk the entire directory structure recursively
for (directory_path, directory_names, file_names) in os.walk(root_directory):
# Get just the last/final folder name in the directory path
base_name = os.path.basename(directory_path)

# Skip any folders we want to ignore
if base_name in ignore_folders:
# print(f"Skipping ignored folder {directory_path}")
continue

# An app.toml file in the folder is our indication that this folder contains
# a snowcli Snowpark App
if not "app.toml" in file_names:
# print(f"Skipping non-app folder {directory_path}")
continue

# Next determine what type of app it is
app_type = "unknown"
if "local_connection.py" in file_names:
app_type = "procedure"
else:
app_type = "function"

# Finally deploy the app with the snowcli tool
print(f"Found {app_type} app in folder {directory_path}")
print(f"Calling snowcli to deploy the {app_type} app")
os.chdir(f"{directory_path}")
# snow login will update the app.toml file with the correct path to the snowsql config file
os.system(f"snow login -c {root_directory}/config -C dev")
os.system(f"snow {app_type} create")
11 changes: 11 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: snowflake-demo
channels:
- https://repo.anaconda.com/pkgs/snowflake
- nodefaults
dependencies:
- python=3.10
- snowflake-snowpark-python
- pip
- pip:
# Snowflake
- snowflake-cli-labs==0.2.9
Binary file added images/demo_overview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
snowflake-snowpark-python[pandas]
snowflake-cli-labs==0.2.9
1 change: 1 addition & 0 deletions steps/01_overview.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Please read and follow the instructions in step 1 in the [Intro to Data Engineering with Snowpark Python](https://quickstarts.snowflake.com/)
1 change: 1 addition & 0 deletions steps/02_setup_quickstart.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Please read and follow the instructions in step 2 in the [Intro to Data Engineering with Snowpark Python](https://quickstarts.snowflake.com/)
54 changes: 54 additions & 0 deletions steps/03_setup_snowflake.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*-----------------------------------------------------------------------------
Hands-On Lab: Intro to Data Engineering with Snowpark Python
Script: 03_setup_snowflake.sql
Author: Jeremiah Hansen
Last Updated: 9/26/2023
-----------------------------------------------------------------------------*/


-- ----------------------------------------------------------------------------
-- Step #1: Accept Anaconda Terms & Conditions
-- ----------------------------------------------------------------------------

-- See Getting Started section in Third-Party Packages (https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-packages.html#getting-started)


-- ----------------------------------------------------------------------------
-- Step #2: Create the account level objects (ACCOUNTADMIN part)
-- ----------------------------------------------------------------------------
USE ROLE ACCOUNTADMIN;

-- Roles
SET MY_USER = CURRENT_USER();
CREATE OR REPLACE ROLE HOL_ROLE;
GRANT ROLE HOL_ROLE TO ROLE SYSADMIN;
GRANT ROLE HOL_ROLE TO USER IDENTIFIER($MY_USER);

GRANT EXECUTE TASK ON ACCOUNT TO ROLE HOL_ROLE;
GRANT MONITOR EXECUTION ON ACCOUNT TO ROLE HOL_ROLE;
GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE HOL_ROLE;

-- Databases
CREATE OR REPLACE DATABASE HOL_DB;
GRANT OWNERSHIP ON DATABASE HOL_DB TO ROLE HOL_ROLE;

-- Warehouses
CREATE OR REPLACE WAREHOUSE HOL_WH WAREHOUSE_SIZE = XSMALL, AUTO_SUSPEND = 300, AUTO_RESUME= TRUE;
GRANT OWNERSHIP ON WAREHOUSE HOL_WH TO ROLE HOL_ROLE;


-- ----------------------------------------------------------------------------
-- Step #3: Create the database level objects
-- ----------------------------------------------------------------------------
USE ROLE HOL_ROLE;
USE WAREHOUSE HOL_WH;
USE DATABASE HOL_DB;

-- Schemas
CREATE OR REPLACE SCHEMA HOL_SCHEMA;

-- External Frostbyte objects
USE SCHEMA HOL_SCHEMA;
CREATE OR REPLACE STAGE FROSTBYTE_RAW_STAGE
URL = 's3://sfquickstarts/data-engineering-with-snowpark-python/'
;
Loading

0 comments on commit 110239a

Please sign in to comment.