diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ea30d5a8..92e4456c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,30 +28,3 @@ jobs: VALIDATE_BASH_EXEC: true VALIDATE_DOCKERFILE_HADOLINT: true VALIDATE_YAML: true - -# build_python_kernel: -# permissions: -# id-token: write -# contents: read -# packages: write -# actions: write -# uses: ./.github/workflows/reusable-docker-build.yml -# strategy: -# matrix: -# # Must be a supported version by jupyter/datascience-notebook -# # https://hub.docker.com/r/jupyter/datascience-notebook/tags?page=1&name=python- -# version: [ "3.9.13", "3.8.13" ] -# secrets: inherit -# with: -# dockerfile: ./kernels/python/Dockerfile -# context: ./kernels/python -# images: | -# ghcr.io/${{ github.repository }}/python -# tags: | -# type=ref,event=branch,prefix=${{ matrix.version }} -# type=ref,event=pr,prefix=${{ matrix.version }} -# type=sha,format=long,prefix=${{ matrix.version }} -# type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }},prefix=${{ matrix.version }} -# build_args: | -# PYTHON_VERSION=${{ matrix.version }} -# platforms: "linux/amd64" diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..46b133c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,46 @@ +# ignore these everywhere +.pythonrc +.Rprofile +apt-install +Aptfile +environment.txt +git_credential_helper.py +git-wrapper.sh +gpu.Aptfile +gpu.requirements.in +initial-condarc +ipython_config.py +secrets_helper.sh +requirements.in +requirements.R +run.sh + +# ...except for these places where we care about changes happening +# (NOTE: this is because the tasks should copy the files down into the build directories) +!scripts/apt-install +!scripts/secrets_helper.sh + +!python/base/Aptfile +!python/datascience/Aptfile +!python/noteable/Aptfile + +!python/base-gpu/gpu.Aptfile +!python/base-gpu/environment.txt + +!python/base-gpu/initial-condarc + +!python/base/requirements.in +!python/datascience/requirements.in +!python/noteable/requirements.in + +!python/run.sh +!python/base-gpu/run.sh +!r/run.sh + +!python/noteable/.pythonrc +!python/noteable/ipython_config.py +!python/noteable/git_credential_helper.py +!python/noteable/git-wrapper.sh + +!r/noteable/.Rprofile +!r/noteable/requirements.R diff --git a/Makefile b/Makefile deleted file mode 100644 index 25e95419..00000000 --- a/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -copy-common-files: - cp requirements.txt kernels/python/ - cp ipython_config.py kernels/python/ - cp secrets_helper.py kernels/python/ - cp git_credential_helper.py kernels/python/ - cp git-wrapper.sh kernels/python/ diff --git a/R/Aptfile b/R/Aptfile new file mode 100755 index 00000000..9c0958e7 --- /dev/null +++ b/R/Aptfile @@ -0,0 +1,13 @@ +build-essential +ca-certificates +cmake +curl +bzip2 +gnupg2 +wget +g++ +git +jq +libudunits2-dev +procps +unixodbc-dev diff --git a/R/base/4.3.0/Dockerfile b/R/base/4.3.0/Dockerfile new file mode 100755 index 00000000..e2280f80 --- /dev/null +++ b/R/base/4.3.0/Dockerfile @@ -0,0 +1,76 @@ +# syntax = docker/dockerfile:1.2.1 +# --- +# Bare minimum R 4.3.x image with IRkernel installed +# - no R packages aside from builtins and IRkernel +# - no git, secrets, SQL, extensions, etc +# --- +ARG NBL_R_VERSION=4.3.0 +FROM r-base:${NBL_R_VERSION} + +# User/group setup +USER root + +ENV NB_USER="noteable" \ + NB_UID=4004 \ + NB_GID=4004 + +RUN groupadd --gid 4004 noteable && \ + useradd --uid 4004 \ + --shell /bin/false \ + --create-home \ + --no-log-init \ + --gid noteable noteable \ + --home-dir /srv/noteable && \ + chown --recursive noteable:noteable /srv/noteable && \ + mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable + +# Install tini to manage passing signals to the child kernel process +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini + +# Use micromamba and set up a virtual environment so we can install packages without root +COPY apt-install /usr/bin/ +# hadolint ignore=DL3045 +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ + ./bin/micromamba shell init -s bash -p ~/micromamba + +USER noteable +RUN micromamba create --name noteable-venv \ + -c conda-forge \ + -y \ + r="${NBL_R_VERSION}" +# make subsequent RUN commands use the virtualenv: +SHELL ["micromamba", "run", "-n", "noteable-venv", "/bin/bash", "-c"] + +# hadolint ignore=SC2239 +RUN R -e "install.packages('IRkernel', repos='http://cran.us.r-project.org')" + +COPY secrets_helper.sh /tmp/secrets_helper.sh +COPY run.sh /usr/local/bin + +ENV HOME="/srv/noteable" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +WORKDIR /etc/noteable/project +EXPOSE 50001-50005 + +ENTRYPOINT ["/tini", "-g", "--"] +CMD ["run.sh"] + +ARG NBL_ARG_BUILD_TIMESTAMP="undefined" +ARG NBL_ARG_REVISION="undefined" +ARG NBL_ARG_BUILD_URL="undefined" +ARG NBL_ARG_VERSION="undefined" +LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \ + org.opencontainers.image.revision="${NBL_ARG_REVISION}" \ + org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \ + org.opencontainers.image.title="noteable-R-${NBL_R_VERSION}" \ + org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \ + org.opencontainers.image.vendor="Noteable" \ + org.opencontainers.image.version="${NBL_ARG_VERSION}" diff --git a/R/noteable/.Rprofile b/R/noteable/.Rprofile new file mode 100755 index 00000000..f61e9eb1 --- /dev/null +++ b/R/noteable/.Rprofile @@ -0,0 +1,99 @@ +library(IRdisplay) +library(repr) +library(reticulate) + +prepare_dex_content <- function(df) { + #' + #' Create schema and data structure for data frame to be rendered by DEX + #' + + # create a schema for a dataframe, which DEX uses to determine column dtypes. + # R data frames don't have this functionality, so we have to use reticulate + # to call into the python pandas library + pandas <- import("pandas") + + # If df is a matrix, convert it to a data frame + if (is.matrix(df)) { + # In R, a matrix is a 2D vector, not a data frame. When reticulate converts an R matrix to Python, + # it becomes a numpy array, not a pandas DataFrame. The pandas function we're using requires a DataFrame, + # so we need to convert the matrix to a data frame first. + # + # We use stringsAsFactors = FALSE to prevent R from converting strings to factors. This is a feature of R + # that can be confusing for people used to Python, where there's no direct equivalent of factors. + # + # We use row.names = FALSE to prevent R from using the first column of the data as row names. This is + # because R matrices don't have row names in the same way that data frames do, and we want to keep the + # structure of the data consistent when we convert it to a DataFrame. + df <- as.data.frame(df, stringsAsFactors = FALSE, row.names = FALSE) + } + df_py <- r_to_py(df) + schema <- pandas$io$json$build_table_schema(df_py, index=FALSE) + + # vectorized format (list of lists) + #data = as.matrix.data.frame(t(df)) + # pandas df.to_dict("records") format + data = as.data.frame.list(df) + + list( + schema = schema, + data = data + ) +} + +prepare_dex_metadata <- function(df) { + #' + #' Create metadata for data frame to be rendered by DEX + #' + list( + default_index_used=TRUE, + dataframe_info = list( + orig_num_rows = dim(df)[0], + orig_num_cols = dim(df)[1] + ) + ) +} + +repr_dex <- function(obj, ...) { + if (is(obj, "data.frame") || is(obj, "matrix")) { + data <- prepare_dex_content(obj) + metadata <- prepare_dex_metadata(obj) + bundle_data <- list("application/vnd.dataresource+json"=data) + bundle_metadata <- list("application/vnd.dataresource+json"=metadata) + # we could use publish_mimebundle() to provide the data and metadata, + # but that doesn't return anything, which triggers repr_html/repr_markdown, etc + #publish_mimebundle(bundle_data, metadata=bundle_metadata) + return(data) + } else { + # if it's not a matrix or data.frame, return NULL to let other repr_* functions handle it. + return(NULL) + } +} + +enable_dex_formatter <- function() { + # Add custom display formatter to newly added mimetype + IRkernel:::replace_in_package('repr', 'mime2repr', c(repr::mime2repr, list(`application/vnd.dataresource+json` = repr_dex))) + + # Add dataresource mimetype to list of recognized mimetypes + mimetypes <- c(getOption('jupyter.display_mimetypes'), "application/vnd.dataresource+json") + options(jupyter.display_mimetypes = mimetypes) + + # Register custom formatter for matrix and data.frame + registerS3method("repr_html", "matrix", repr_dex) + registerS3method("repr_html", "data.frame", repr_dex) +} + +disable_dex_formatter <- function() { + # Remove custom display formatter + IRkernel:::replace_in_package('repr', 'mime2repr', repr::mime2repr) + + # Remove dataresource mimetype from list of recognized mimetypes + mimetypes <- setdiff(getOption('jupyter.display_mimetypes'), "application/vnd.dataresource+json") + options(jupyter.display_mimetypes = mimetypes) + + # Reset the formatter for matrix and data.frame to the default + registerS3method("repr_html", "matrix", repr:::repr_html.matrix) + registerS3method("repr_html", "data.frame", repr:::repr_html.data.frame) +} + +# enable by default +enable_dex_formatter() diff --git a/R/noteable/4.3.0/Dockerfile b/R/noteable/4.3.0/Dockerfile new file mode 100644 index 00000000..46bcd085 --- /dev/null +++ b/R/noteable/4.3.0/Dockerfile @@ -0,0 +1,18 @@ +# syntax = docker/dockerfile:1.2.1 +# Noteable build: adds packages to enable Noteable-specific functionality: +# - DEX support (via .Rprofile) +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +USER noteable + +# Install python to use with Reticulate +RUN micromamba install python=3.9 -y -c conda-forge + +# R package dependencies and py_install +COPY requirements.R /tmp/requirements.R +RUN R -e "source('/tmp/requirements.R')" + +# similarly, copy any R commands that need to run on startup +COPY .Rprofile /srv/noteable/.Rprofile diff --git a/R/noteable/requirements.R b/R/noteable/requirements.R new file mode 100755 index 00000000..417664fc --- /dev/null +++ b/R/noteable/requirements.R @@ -0,0 +1,5 @@ +install.packages('reticulate', repos='http://cran.us.r-project.org') +library(reticulate) +# Python packages to be used in R via reticulate +# ref: https://rstudio.github.io/reticulate/articles/python_packages.html +py_install('pandas==1.5.3', pip=TRUE) diff --git a/R/run.sh b/R/run.sh new file mode 100755 index 00000000..4bebb31c --- /dev/null +++ b/R/run.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -o pipefail +set -o nounset +set -o errexit + +echo "Local time: $(date)" + +set -x + +connection_file=/tmp/connection_file.json +cp /etc/noteable/connections/connection_file.json ${connection_file} + +# Inject Secrets into environment (see script docstring for more info) +# set +x to avoid echoing the Secrets in plaintext to logs +set +x +echo "Injecting Secrets into environment, echoing is turned off" +# shellcheck disable=SC1091 +source /tmp/secrets_helper.sh +echo "Done injecting Secrets, turning echoing back on" +set -x + +echo "Starting R kernel" +micromamba run -n noteable-venv R --slave -e "IRkernel::main()" --args ${connection_file} diff --git a/Taskfile.R.yaml b/Taskfile.R.yaml new file mode 100644 index 00000000..e9faa86b --- /dev/null +++ b/Taskfile.R.yaml @@ -0,0 +1,38 @@ +version: 3 + +# https://hub.docker.com/_/r-base/tags +vars: + NBL_R_VERSION: 4.3.0 + IDENTIFIER: base + +# NOTE: When using `deps: []`, variables are inherited from the current task, but when calling them +# directly in `cmds: []`, the variables have to be passed in explicitly. + +tasks: + core:build: + desc: Build the R 4.x image + cmds: + - >- + docker build R/{{.IDENTIFIER}}/{{.NBL_R_VERSION}} \ + --build-arg "NBL_R_VERSION={{.NBL_R_VERSION}}" \ + --build-arg "BASE_IMAGE={{.BASE_IMAGE}}" \ + --tag "local/kernel-r-{{.NBL_R_VERSION}}-{{.IDENTIFIER}}:dev" + + base:copy-files: + desc: Copy files from the R directory to the build directories + cmds: + - task copy-root-files LANGUAGE=R IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}} + - task copy-language-files LANGUAGE=R IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}} + + base:build: + desc: Build the R 4.x base image after copying required files + cmds: + - task r:base:copy-files IDENTIFIER=base NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}} + - task r:core:build IDENTIFIER=base NBL_R_VERSION={{.NBL_R_VERSION}} + + noteable:build: + desc: Build the R 4.3.x image with data frame -> DEX support + cmds: + - cp R/noteable/.Rprofile R/noteable/{{.NBL_R_VERSION}}/.Rprofile + - cp R/noteable/requirements.R R/noteable/{{.NBL_R_VERSION}}/requirements.R + - task r:core:build IDENTIFIER=noteable NBL_R_VERSION={{.NBL_R_VERSION}} BASE_IMAGE=local/kernel-r-{{.NBL_R_VERSION}}-base:dev diff --git a/Taskfile.python.yaml b/Taskfile.python.yaml new file mode 100644 index 00000000..2613a8f5 --- /dev/null +++ b/Taskfile.python.yaml @@ -0,0 +1,296 @@ +version: 3 + +# https://hub.docker.com/_/python/tags?page=1&name=slim-bullseye +vars: + NBL_PYTHON_VERSION: 3.9 + IDENTIFIER: base + +# NOTE: When using `deps: []`, variables are inherited from the current task, but when calling them +# directly in `cmds: []`, the variables have to be passed in explicitly. + +tasks: + core:build: + desc: Build the Python 3.x image + cmds: + - >- + docker build python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}} \ + --build-arg "NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}}" \ + --build-arg "BASE_IMAGE={{.BASE_IMAGE}}" \ + --target "{{.BUILD_TARGET}}" \ + --tag "local/kernel-python-{{.NBL_PYTHON_VERSION}}-{{.IDENTIFIER}}{{.TAG_SUFFIX}}:dev" + requires: + vars: [IDENTIFIER, NBL_PYTHON_VERSION] + vars: + BUILD_TARGET: '{{default "base" .BUILD_TARGET}}' + TAG_SUFFIX: '{{default "" .TAG_SUFFIX}}' + + base:copy-files: + desc: Copy files from the Python directory to the build directories + cmds: + - task copy-root-files LANGUAGE=python IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_PYTHON_VERSION}} + - task copy-language-files LANGUAGE=python IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_PYTHON_VERSION}} + + base:pyenv:install: + desc: Install the specified version of Python using pyenv + cmds: + - pyenv install -s {{.NBL_PYTHON_VERSION}} + + base:pyenv:virtualenv: + desc: Create a new virtual environment using pyenv + deps: [base:pyenv:install] + cmds: + - pyenv virtualenv {{.NBL_PYTHON_VERSION}} py{{.NBL_PYTHON_VERSION}} || true + + base:deps:install-pip-tools: + desc: Install Python dependencies + deps: [base:pyenv:virtualenv] + cmds: + - $(pyenv root)/versions/py{{.NBL_PYTHON_VERSION}}/bin/python -m pip install pip-tools==6.13.0 + + base:deps:copy-requirements: + desc: Copy identifier-level *requirements.in files to the version-level build directories + cmds: + - mkdir -p python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}} + - cp python/{{.IDENTIFIER}}/{{.FILE_PREFIX}}requirements.in python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}}/{{.FILE_PREFIX}}requirements.in + generates: + - python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}}/{{.FILE_PREFIX}}requirements.in + + # Base image + base:lock-dependencies: + desc: Uses piptools compile to lock Python dependency versions for a specific build identifier, version, and file prefix (e.g. "gpu.") + deps: [base:deps:install-pip-tools] + cmds: + - task python:base:deps:copy-requirements FILE_PREFIX={{.FILE_PREFIX}} IDENTIFIER={{.IDENTIFIER}} NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # specifically looks at the python///.requirements.in file, not the generic requirements.in files + - $(pyenv root)/versions/py{{.NBL_PYTHON_VERSION}}/bin/python -m piptools compile --resolver=backtracking --output-file python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}}/{{.FILE_PREFIX}}requirements.txt python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}}/{{.NBL_PYTHON_VERSION}}.{{.FILE_PREFIX}}requirements.in + generates: + - python/{{.IDENTIFIER}}/{{.NBL_PYTHON_VERSION}}/{{.FILE_PREFIX}}requirements.txt + + base:build: + desc: Build the Python 3.x base image after copying required files + cmds: + - task python:base:copy-files IDENTIFIER=base NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + - task python:core:build IDENTIFIER=base NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + + # Base GPU image + base-gpu:lock-dependencies: + desc: Lock Python dependencies for GPU builds using pip-compile + deps: [base:lock-dependencies] + cmds: + - task python:base:lock-dependencies IDENTIFIER=base-gpu NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + + base-gpu:build: + desc: Build the Python 3.x image with GPU support after copying required files + cmds: + - task python:base:copy-files IDENTIFIER=base-gpu NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # copy base-gpu specific files + - cp python/base-gpu/environment.txt python/base-gpu/{{.NBL_PYTHON_VERSION}}/environment.txt + - cp python/base-gpu/gpu.Aptfile python/base-gpu/{{.NBL_PYTHON_VERSION}}/gpu.Aptfile + - cp python/base-gpu/run.sh python/base-gpu/{{.NBL_PYTHON_VERSION}}/run.sh + - cp python/base-gpu/initial-condarc python/base-gpu/{{.NBL_PYTHON_VERSION}}/initial-condarc + # build off of the base image + - task python:core:build IDENTIFIER=base-gpu NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} BASE_IMAGE=local/kernel-python-{{.NBL_PYTHON_VERSION}}-base:dev + + # Datascience image + datascience:lock-dependencies: + desc: Lock Python dependencies for datascience builds using pip-compile + cmds: + - task python:base:lock-dependencies IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + + datascience:build: + desc: Build the Python 3.x image with datascience packages extending the base image of the same version + cmds: + # ensure the base image is built first + - task python:base:build IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # build the datascience image + - task python:core:build IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} BASE_IMAGE=local/kernel-python-{{.NBL_PYTHON_VERSION}}-base:dev + + # Datascience GPU image + datascience-gpu:lock-dependencies: + desc: Lock Python dependencies for datascience builds using pip-compile + cmds: + - task python:base:lock-dependencies IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} FILE_PREFIX="gpu." + + datascience-gpu:build: + desc: Build the Python 3.x image with datascience packages and GPU support + cmds: + # ensure the base-gpu image is built first + - task python:base-gpu:build IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # build the datascience-gpu image + - task python:core:build IDENTIFIER=datascience NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} BASE_IMAGE=local/kernel-python-{{.NBL_PYTHON_VERSION}}-base-gpu:dev BUILD_TARGET=gpu TAG_SUFFIX=-gpu + + # Noteable image + noteable:copy-files: + desc: Copy files from the `python/noteable` directory to the build directories + deps: [base:deps:copy-requirements] + cmds: + - cp python/noteable/Aptfile python/noteable/{{.NBL_PYTHON_VERSION}}/Aptfile + - cp python/noteable/git_credential_helper.py python/noteable/{{.NBL_PYTHON_VERSION}}/git_credential_helper.py + - cp python/noteable/git-wrapper.sh python/noteable/{{.NBL_PYTHON_VERSION}}/git-wrapper.sh + - cp python/noteable/ipython_config.py python/noteable/{{.NBL_PYTHON_VERSION}}/ipython_config.py + - cp python/noteable/.pythonrc python/noteable/{{.NBL_PYTHON_VERSION}}/.pythonrc + + noteable:lock-dependencies: + desc: Lock Python dependencies for Noteable builds using pip-compile + cmds: + - task python:base:lock-dependencies IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + + noteable:build: + desc: Build the Python 3.x image with "Noteable feature"-related packages (SQL, git integration, DEX, etc) extending the datascience image of the same version + cmds: + # ensure the datascience image is built first + - task python:datascience:build IDENTIFIER=noteable-gpu NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # build the noteable image after copying over noteable-specific files + - task python:noteable:copy-files IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + - task python:core:build IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} BASE_IMAGE=local/kernel-python-{{.NBL_PYTHON_VERSION}}-datascience:dev + + # Noteable GPU image + noteable-gpu:lock-dependencies: + desc: Lock Python dependencies for Noteable builds using pip-compile + cmds: + - task python:base:lock-dependencies IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} FILE_PREFIX="gpu." + + noteable-gpu:build: + desc: Build the Python 3.x image with "Noteable feature"-related packages (SQL, git integration, DEX, etc) and GPU support + cmds: + # ensure the datascience-gpu image is built first + - task python:datascience-gpu:build IDENTIFIER=noteable-gpu NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} + # build the noteable-gpu image after copying over noteable-specific files + - task python:noteable:copy-files IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} FILE_PREFIX="gpu." + - task python:core:build IDENTIFIER=noteable NBL_PYTHON_VERSION={{.NBL_PYTHON_VERSION}} BASE_IMAGE=local/kernel-python-{{.NBL_PYTHON_VERSION}}-datascience-gpu:dev BUILD_TARGET=gpu TAG_SUFFIX=-gpu + + # convenience functions for building multiple images in parallel + base:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x builds using pip-compile + deps: + - task: base:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: base:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: base:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.11 } + + base:build-all: + desc: Build all Python base images + deps: + - task: base:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: base:build + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: base:build + vars: { NBL_PYTHON_VERSION: 3.11 } + + base-gpu:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x GPU builds using pip-compile + deps: + - task: base-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: base-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: base-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.11 } + + base-gpu:build-all: + desc: Build all Python base-gpu images + deps: + - task: base-gpu:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: base-gpu:build + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: base-gpu:build + vars: { NBL_PYTHON_VERSION: 3.11 } + + # datascience convenience functions + datascience:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x builds using pip-compile + deps: + - task: datascience:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: datascience:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: datascience:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.11 } + + datascience:build-all: + desc: Build all Python datascience images + deps: + - task: datascience:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: datascience:build + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: datascience:build + vars: { NBL_PYTHON_VERSION: 3.11 } + + datascience-gpu:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x GPU builds using pip-compile + deps: + - task: datascience-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: datascience-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: datascience-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.11 } + + datascience-gpu:build-all: + desc: Build all Python datascience-gpu images + deps: + - task: datascience-gpu:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: datascience-gpu:build + vars: { NBL_PYTHON_VERSION: 3.10 } + - task: datascience-gpu:build + vars: { NBL_PYTHON_VERSION: 3.11 } + + # Noteable convenience functions + noteable:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x builds using pip-compile + deps: + - task: noteable:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: noteable:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + + noteable:build-all: + desc: Build all Python noteable images + deps: + - task: noteable:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: noteable:build + vars: { NBL_PYTHON_VERSION: 3.10 } + + noteable-gpu:lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x GPU builds using pip-compile + deps: + - task: noteable-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: noteable-gpu:lock-dependencies + vars: { NBL_PYTHON_VERSION: 3.10 } + + noteable-gpu:build-all: + desc: Build all Python noteable-gpu images + deps: + - task: noteable-gpu:build + vars: { NBL_PYTHON_VERSION: 3.9 } + - task: noteable-gpu:build + vars: { NBL_PYTHON_VERSION: 3.10 } + + # convenience functions for building all images + lock-all-dependencies: + desc: Lock Python dependencies for all Python 3.x builds using pip-compile + deps: + - task: base:lock-all-dependencies + - task: base-gpu:lock-all-dependencies + - task: datascience:lock-all-dependencies + - task: datascience-gpu:lock-all-dependencies + - task: noteable:lock-all-dependencies + - task: noteable-gpu:lock-all-dependencies + + build-all: + desc: Build all Python images + deps: + - task: base:build-all + - task: base-gpu:build-all + - task: datascience:build-all + - task: datascience-gpu:build-all + - task: noteable:build-all + - task: noteable-gpu:build-all diff --git a/Taskfile.yaml b/Taskfile.yaml new file mode 100644 index 00000000..f86ac5ea --- /dev/null +++ b/Taskfile.yaml @@ -0,0 +1,25 @@ +version: 3 + +includes: + python: ./Taskfile.python.yaml + r: ./Taskfile.R.yaml + +tasks: + copy-root-files: + desc: Copy files from the root of the repository to the base build directories + cmds: + - cp ./scripts/apt-install {{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/apt-install + - cp ./scripts/secrets_helper.sh {{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/secrets_helper.sh + generates: + - ./{{.LANGUAGE}}/base/{{.NBL_LANGUAGE_VERSION}}/apt-install + - ./{{.LANGUAGE}}/base/{{.NBL_LANGUAGE_VERSION}}/secrets_helper.sh + + copy-language-files: + desc: Copy files from the language-level directory to the build directories + deps: [copy-root-files] + cmds: + - cp {{.LANGUAGE}}/Aptfile {{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/Aptfile + - cp {{.LANGUAGE}}/run.sh {{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/run.sh + generates: + - ./{{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/Aptfile + - ./{{.LANGUAGE}}/{{.IDENTIFIER}}/{{.NBL_LANGUAGE_VERSION}}/run.sh diff --git a/ipython_config.py b/ipython_config.py deleted file mode 100644 index fb66a186..00000000 --- a/ipython_config.py +++ /dev/null @@ -1,11 +0,0 @@ -c.InteractiveShellApp.extensions = [ - "noteable_magics", -] - -c.SqlMagic.feedback = False -c.SqlMagic.autopandas = True -c.NTBLMagic.project_dir = "/etc/noteable/project" -c.NoteableDataLoaderMagic.return_head = False -c.IPythonKernel._execute_sleep = 0.15 -# 10 minutes to support large files -c.NTBLMagic.planar_ally_default_timeout_seconds = 600 \ No newline at end of file diff --git a/kernels/python/.pythonrc b/kernels/python/.pythonrc deleted file mode 100644 index 6a1eac85..00000000 --- a/kernels/python/.pythonrc +++ /dev/null @@ -1,8 +0,0 @@ -import pandas as pd - -import dx - -dx.set_option("DISPLAY_MAX_ROWS", 50_000) -dx.set_option("DISPLAY_MAX_COLUMNS", 100) -dx.set_option("ENABLE_DATALINK", True) -dx.set_option("ENABLE_ASSIGNMENT", False) \ No newline at end of file diff --git a/kernels/python/Dockerfile b/kernels/python/Dockerfile deleted file mode 100644 index 6ee0fda6..00000000 --- a/kernels/python/Dockerfile +++ /dev/null @@ -1,102 +0,0 @@ -# syntax = docker/dockerfile:1.2.1 -ARG PYTHON_VERSION -FROM jupyter/datascience-notebook:python-${PYTHON_VERSION} - -USER root - -# datascience-notebook:python-3.9.13 includes psutil 5.9.2 with cooked C lib, but -# later pip installs end up installing 5.9.4, but for some -# reason 'import psutil' will end up getting the python 5.9.4 but the -# C lib from 5.9.2, and, unlike Smeagol, it hateses the precious. -RUN pip uninstall -y psutil - -# Set up log file for magics -RUN touch /var/log/noteable_magics.log && \ - chown 4004:4004 /var/log/noteable_magics.log - -# When image is run, run the code with the environment -# activated: -SHELL ["/bin/bash", "-c"] - -WORKDIR /tmp - -# hadolint ignore=DL3008,DL3015 -RUN apt-get update && \ - apt-get install -y jq procps git unixodbc-dev g++ \ - && rm -rf /var/lib/apt/lists/* - -ENV NB_USER="noteable" \ - NB_UID=4004 \ - NB_GID=4004 - -# Create the default unprivileged user -RUN groupadd --gid 4004 noteable && \ - useradd --uid 4004 --shell /bin/false --create-home --no-log-init --gid noteable noteable && \ - chown --recursive noteable:noteable /home/noteable - -RUN mkdir /etc/ipython && chown noteable:noteable /etc/ipython -RUN mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable - -RUN chown noteable:noteable "${JULIA_PKGDIR}" && \ - chown noteable:noteable "${CONDA_DIR}" && \ - fix-permissions "${JULIA_PKGDIR}" && \ - fix-permissions "${CONDA_DIR}" - -# Run non-privileged user -USER noteable - -ENV PATH="/home/noteable/.local/bin:${PATH}" \ - HOME="/home/noteable" \ - XDG_CACHE_HOME="/home/noteable/.cache/" \ - GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" - -# hadolint ignore=DL3045 -COPY environment.txt ./ - -# hadolint ignore=SC2034 -RUN mamba install --file environment.txt - -# hadolint ignore=DL3045 -COPY requirements.txt ./ - -# hadolint ignore=SC1008,SC2155,DL3042,SC2102 -RUN pip install -I --no-cache-dir -r requirements.txt - -# Copy over any python commands that need to run on startup -# that aren't covered by IPython extensions -COPY .pythonrc /home/noteable/.pythonrc - -# Enable the widgets nbextension -# hadolint ignore=SC1008 -RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension - -# Smoke test to ensure packages were installed properly -# hadolint ignore=SC1008 -RUN python -c "import noteable_magics, psutil" - -RUN git config --global user.name "Noteable Kernel" && \ - git config --global user.email "engineering@noteable.io" && \ - git config --global safe.directory /etc/noteable/project && \ - git config --global credential.helper /git_credential_helper.py && \ - git config --global credential.useHttpPath true - -# https://ipython.readthedocs.io/en/stable/config/intro.html#systemwide-configuration -COPY ipython_config.py /etc/ipython - -# Set standard working directory for noteable project -WORKDIR /etc/noteable/project - -# Add the entrypoint script to the $PATH -COPY run.sh /usr/local/bin -COPY secrets_helper.py /tmp/secrets_helper.py -COPY git_credential_helper.py /git_credential_helper.py -COPY git-wrapper.sh /usr/local/bin/git - -EXPOSE 50001-50005 - -# Use tini to manage passing signals to the child kernel process -# -g will ensure signals are passed to the entire child process *group*, -# not just the immediate child process (bash) -# https://github.com/krallin/tini#process-group-killing -ENTRYPOINT ["tini", "-g", "--"] -CMD ["run.sh"] diff --git a/kernels/python/environment.txt b/kernels/python/environment.txt deleted file mode 100644 index 20d65f2f..00000000 --- a/kernels/python/environment.txt +++ /dev/null @@ -1,7 +0,0 @@ -jupyter_client=7.3.* -ipython=8.0.* -vdom=0.6 -papermill=2.2.* -ipywidgets=7.6.* -plotly=4.14.3 -geopandas=0.11.0 \ No newline at end of file diff --git a/kernels/python/run.sh b/kernels/python/run.sh deleted file mode 100755 index 6fdb4434..00000000 --- a/kernels/python/run.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -set -o pipefail -set -o nounset -set -o errexit - -echo "Local time: $(date)" - -set -x - -connection_file=/tmp/connection_file.json - -cp /etc/noteable/connections/connection_file.json ${connection_file} - -kernel_name=$(jq -r .kernel_name /tmp/connection_file.json) - -# Inject Secrets into environment (see script docstring for more info) -# set +x to avoid echoing the Secrets in plaintext to logs -set +x -echo "Injecting Secrets into environment, echoing is turned off" -eval "$(python /tmp/secrets_helper.py)" -echo "Done injecting Secrets, turning echoing back on" -set -x - -case $kernel_name in - - python | python3) - echo "Starting Python kernel" - # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP - export PYTHONSTARTUP=~/.pythonrc - exec python -m ipykernel_launcher -f ${connection_file} --debug - ;; - - ir) - echo "Starting R kernel" - exec R --slave -e "IRkernel::main()" --args ${connection_file} - ;; - - julia | julia-1.6) - echo "Starting Julia kernel" - # project path necessary to keep julia form using its defaults - exec julia -i --color=yes --project=/etc/noteable/project /opt/julia/packages/IJulia/e8kqU/src/kernel.jl ${connection_file} - ;; - - *) - echo "Unrecognized '$kernel_name' kernel, falling back to Python" - # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP - export PYTHONSTARTUP=~/.pythonrc - exec python -m ipykernel_launcher -f ${connection_file} --debug - ;; -esac \ No newline at end of file diff --git a/python/Aptfile b/python/Aptfile new file mode 100755 index 00000000..268ae881 --- /dev/null +++ b/python/Aptfile @@ -0,0 +1,12 @@ +build-essential +ca-certificates +curl +bzip2 +gnupg2 +wget +g++ +git +jq +libudunits2-dev +procps +unixodbc-dev diff --git a/python/base-gpu/3.10/3.10.requirements.in b/python/base-gpu/3.10/3.10.requirements.in new file mode 100755 index 00000000..a67c8490 --- /dev/null +++ b/python/base-gpu/3.10/3.10.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.10-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base-gpu/3.10/Dockerfile b/python/base-gpu/3.10/Dockerfile new file mode 100755 index 00000000..8b13f5b9 --- /dev/null +++ b/python/base-gpu/3.10/Dockerfile @@ -0,0 +1,84 @@ +# syntax = docker/dockerfile:1.2.1 +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +ARG NBL_PYTHON_VERSION=3.10 + +USER root + +#### Configurations for GPU drivers +# CUDA Compatibility matrix: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions +# AWS GPU AMI information: https://github.com/awslabs/amazon-eks-ami/blob/master/CHANGELOG.md +ENV CUDNN_VERSION="8.2.4.15-1" \ + CUDA_VERSION="11.4" \ + NVIDIA_LIBCUDNN_VERSION="8.2.4.15" \ + CUDA_DASH="11-4" \ + CONDA_DIR="/opt/conda" + +# install micromamba +COPY --chown=noteable-noteable initial-condarc "${CONDA_DIR}/.condarc" +WORKDIR /tmp +RUN set -x && \ + arch=$(uname -m) && \ + if [ "${arch}" = "x86_64" ]; then \ + # Should be simpler, see + arch="64"; \ + fi && \ + wget --progress=dot:giga -O /tmp/micromamba.tar.bz2 \ + "https://micromamba.snakepit.net/api/micromamba/linux-${arch}/latest" && \ + tar -xvjf /tmp/micromamba.tar.bz2 --strip-components=1 bin/micromamba && \ + rm /tmp/micromamba.tar.bz2 && \ + # Install the packages + ./micromamba install \ + --root-prefix="${CONDA_DIR}" \ + --prefix="${CONDA_DIR}" \ + --yes \ + python=$NBL_PYTHON_VERSION \ + 'mamba' && \ + rm micromamba + +ENV PATH="${CONDA_DIR}/bin:${PATH}" + +# Pin major.minor version of python +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN mamba list python | grep '^python ' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \ + mamba clean --all -f -y + +WORKDIR /tmp + +RUN wget --progress=dot:giga https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb + +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile +COPY gpu.Aptfile . +RUN /usr/bin/apt-install gpu.Aptfile + +ENV PATH="/srv/noteable/.local/bin:${PATH}" \ + HOME="/srv/noteable" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +RUN chown -R noteable:noteable "${CONDA_DIR}" + +# Run non-privileged user +USER noteable + +COPY environment.txt /tmp/environment.txt +COPY requirements.txt /tmp/requirements.txt +RUN mamba env update -n base --file /tmp/environment.txt && \ + pip install -I --no-cache-dir -r /tmp/requirements.txt + +RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ + cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ + +# Set environment variables for Tensorflow / Cuda +ENV CUDNN_PATH=/opt/conda/lib/python3.9/site-packages/nvidia/cudnn +# ENV doesn't seem to inherit from previous values when ran in same command +# We move to separate lines to ensure CUDNN_PATH is available +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_DIR/lib/:$CUDNN_PATH/lib \ + XLA_FLAGS="--xla_gpu_cuda_data_dir=$CONDA_DIR/lib/" + +# Overwrite the base run.sh to include `mamba` usage +COPY run.sh /usr/local/bin diff --git a/python/base-gpu/3.10/requirements.txt b/python/base-gpu/3.10/requirements.txt new file mode 100644 index 00000000..5d568b24 --- /dev/null +++ b/python/base-gpu/3.10/requirements.txt @@ -0,0 +1,90 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/base-gpu/3.10/requirements.txt --resolver=backtracking python/base-gpu/3.10/3.10.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +cuda-python==12.2.0 + # via -r python/base-gpu/3.10/requirements.in +cython==0.29.36 + # via cuda-python +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +ipykernel==6.23.3 + # via -r python/base-gpu/3.10/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +nvidia-cublas-cu11==11.11.3.6 + # via nvidia-cudnn-cu11 +nvidia-cudnn-cu11==8.6.0.163 + # via -r python/base-gpu/3.10/requirements.in +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.1 + # via jupyter-core +prompt-toolkit==3.0.39 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/python/base-gpu/3.11/3.11.requirements.in b/python/base-gpu/3.11/3.11.requirements.in new file mode 100755 index 00000000..702513a3 --- /dev/null +++ b/python/base-gpu/3.11/3.11.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.11-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base-gpu/3.11/Dockerfile b/python/base-gpu/3.11/Dockerfile new file mode 100755 index 00000000..2a6973e9 --- /dev/null +++ b/python/base-gpu/3.11/Dockerfile @@ -0,0 +1,84 @@ +# syntax = docker/dockerfile:1.2.1 +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +ARG NBL_PYTHON_VERSION=3.11 + +USER root + +#### Configurations for GPU drivers +# CUDA Compatibility matrix: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions +# AWS GPU AMI information: https://github.com/awslabs/amazon-eks-ami/blob/master/CHANGELOG.md +ENV CUDNN_VERSION="8.2.4.15-1" \ + CUDA_VERSION="11.4" \ + NVIDIA_LIBCUDNN_VERSION="8.2.4.15" \ + CUDA_DASH="11-4" \ + CONDA_DIR="/opt/conda" + +# install micromamba +COPY --chown=noteable-noteable initial-condarc "${CONDA_DIR}/.condarc" +WORKDIR /tmp +RUN set -x && \ + arch=$(uname -m) && \ + if [ "${arch}" = "x86_64" ]; then \ + # Should be simpler, see + arch="64"; \ + fi && \ + wget --progress=dot:giga -O /tmp/micromamba.tar.bz2 \ + "https://micromamba.snakepit.net/api/micromamba/linux-${arch}/latest" && \ + tar -xvjf /tmp/micromamba.tar.bz2 --strip-components=1 bin/micromamba && \ + rm /tmp/micromamba.tar.bz2 && \ + # Install the packages + ./micromamba install \ + --root-prefix="${CONDA_DIR}" \ + --prefix="${CONDA_DIR}" \ + --yes \ + python=$NBL_PYTHON_VERSION \ + 'mamba' && \ + rm micromamba + +ENV PATH="${CONDA_DIR}/bin:${PATH}" + +# Pin major.minor version of python +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN mamba list python | grep '^python ' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \ + mamba clean --all -f -y + +WORKDIR /tmp + +RUN wget --progress=dot:giga https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb + +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile +COPY gpu.Aptfile . +RUN /usr/bin/apt-install gpu.Aptfile + +ENV PATH="/srv/noteable/.local/bin:${PATH}" \ + HOME="/srv/noteable" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +RUN chown -R noteable:noteable "${CONDA_DIR}" + +# Run non-privileged user +USER noteable + +COPY environment.txt /tmp/environment.txt +COPY requirements.txt /tmp/requirements.txt +RUN mamba env update -n base --file /tmp/environment.txt && \ + pip install -I --no-cache-dir -r /tmp/requirements.txt + +RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ + cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ + +# Set environment variables for Tensorflow / Cuda +ENV CUDNN_PATH=/opt/conda/lib/python3.9/site-packages/nvidia/cudnn +# ENV doesn't seem to inherit from previous values when ran in same command +# We move to separate lines to ensure CUDNN_PATH is available +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_DIR/lib/:$CUDNN_PATH/lib \ + XLA_FLAGS="--xla_gpu_cuda_data_dir=$CONDA_DIR/lib/" + +# Overwrite the base run.sh to include `mamba` usage +COPY run.sh /usr/local/bin diff --git a/python/base-gpu/3.11/requirements.txt b/python/base-gpu/3.11/requirements.txt new file mode 100644 index 00000000..e8f72c59 --- /dev/null +++ b/python/base-gpu/3.11/requirements.txt @@ -0,0 +1,90 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=python/base-gpu/3.11/requirements.txt --resolver=backtracking python/base-gpu/3.11/3.11.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +cuda-python==12.2.0 + # via -r python/base-gpu/3.11/requirements.in +cython==0.29.36 + # via cuda-python +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +ipykernel==6.23.3 + # via -r python/base-gpu/3.11/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +nvidia-cublas-cu11==11.11.3.6 + # via nvidia-cudnn-cu11 +nvidia-cudnn-cu11==8.6.0.163 + # via -r python/base-gpu/3.11/requirements.in +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.1 + # via jupyter-core +prompt-toolkit==3.0.39 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/python/base-gpu/3.9/3.9.requirements.in b/python/base-gpu/3.9/3.9.requirements.in new file mode 100755 index 00000000..9ec676dd --- /dev/null +++ b/python/base-gpu/3.9/3.9.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.9-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base-gpu/3.9/Dockerfile b/python/base-gpu/3.9/Dockerfile new file mode 100755 index 00000000..dc515c8d --- /dev/null +++ b/python/base-gpu/3.9/Dockerfile @@ -0,0 +1,84 @@ +# syntax = docker/dockerfile:1.2.1 +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +ARG NBL_PYTHON_VERSION=3.9 + +USER root + +#### Configurations for GPU drivers +# CUDA Compatibility matrix: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions +# AWS GPU AMI information: https://github.com/awslabs/amazon-eks-ami/blob/master/CHANGELOG.md +ENV CUDNN_VERSION="8.2.4.15-1" \ + CUDA_VERSION="11.4" \ + NVIDIA_LIBCUDNN_VERSION="8.2.4.15" \ + CUDA_DASH="11-4" \ + CONDA_DIR="/opt/conda" + +# install micromamba +COPY --chown=noteable-noteable initial-condarc "${CONDA_DIR}/.condarc" +WORKDIR /tmp +RUN set -x && \ + arch=$(uname -m) && \ + if [ "${arch}" = "x86_64" ]; then \ + # Should be simpler, see + arch="64"; \ + fi && \ + wget --progress=dot:giga -O /tmp/micromamba.tar.bz2 \ + "https://micromamba.snakepit.net/api/micromamba/linux-${arch}/latest" && \ + tar -xvjf /tmp/micromamba.tar.bz2 --strip-components=1 bin/micromamba && \ + rm /tmp/micromamba.tar.bz2 && \ + # Install the packages + ./micromamba install \ + --root-prefix="${CONDA_DIR}" \ + --prefix="${CONDA_DIR}" \ + --yes \ + python=$NBL_PYTHON_VERSION \ + 'mamba' && \ + rm micromamba + +ENV PATH="${CONDA_DIR}/bin:${PATH}" + +# Pin major.minor version of python +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN mamba list python | grep '^python ' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \ + mamba clean --all -f -y + +WORKDIR /tmp + +RUN wget --progress=dot:giga https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb + +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile +COPY gpu.Aptfile . +RUN /usr/bin/apt-install gpu.Aptfile + +ENV PATH="/srv/noteable/.local/bin:${PATH}" \ + HOME="/srv/noteable" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +RUN chown -R noteable:noteable "${CONDA_DIR}" + +# Run non-privileged user +USER noteable + +COPY environment.txt /tmp/environment.txt +COPY requirements.txt /tmp/requirements.txt +RUN mamba env update -n base --file /tmp/environment.txt && \ + pip install -I --no-cache-dir -r /tmp/requirements.txt + +RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ + cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ + +# Set environment variables for Tensorflow / Cuda +ENV CUDNN_PATH=/opt/conda/lib/python3.9/site-packages/nvidia/cudnn +# ENV doesn't seem to inherit from previous values when ran in same command +# We move to separate lines to ensure CUDNN_PATH is available +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_DIR/lib/:$CUDNN_PATH/lib \ + XLA_FLAGS="--xla_gpu_cuda_data_dir=$CONDA_DIR/lib/" + +# Overwrite the base run.sh to include `mamba` usage +COPY run.sh /usr/local/bin diff --git a/python/base-gpu/3.9/requirements.txt b/python/base-gpu/3.9/requirements.txt new file mode 100644 index 00000000..06cf52fa --- /dev/null +++ b/python/base-gpu/3.9/requirements.txt @@ -0,0 +1,96 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/base-gpu/3.9/requirements.txt --resolver=backtracking python/base-gpu/3.9/3.9.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +cuda-python==12.2.0 + # via -r python/base-gpu/3.9/requirements.in +cython==0.29.35 + # via cuda-python +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +importlib-metadata==6.7.0 + # via jupyter-client +ipykernel==6.23.3 + # via -r python/base-gpu/3.9/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +nvidia-cublas-cu11==11.11.3.6 + # via nvidia-cudnn-cu11 +nvidia-cudnn-cu11==8.6.0.163 + # via -r python/base-gpu/3.9/requirements.in +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.0 + # via jupyter-core +prompt-toolkit==3.0.38 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +typing-extensions==4.7.0 + # via ipython +wcwidth==0.2.6 + # via prompt-toolkit +zipp==3.15.0 + # via importlib-metadata diff --git a/python/base-gpu/environment.txt b/python/base-gpu/environment.txt new file mode 100755 index 00000000..b2dcbc07 --- /dev/null +++ b/python/base-gpu/environment.txt @@ -0,0 +1,2 @@ +conda-forge::cudatoolkit=11.7.0 +nvidia::cuda-nvcc=11.3.58 \ No newline at end of file diff --git a/python/base-gpu/gpu.Aptfile b/python/base-gpu/gpu.Aptfile new file mode 100644 index 00000000..82b42707 --- /dev/null +++ b/python/base-gpu/gpu.Aptfile @@ -0,0 +1,3 @@ +cuda-toolkit +libcudnn8=8.5.0.96-1+cuda11.7 +libcudnn8-dev=8.5.0.96-1+cuda11.7 diff --git a/python/base-gpu/initial-condarc b/python/base-gpu/initial-condarc new file mode 100644 index 00000000..66ecf608 --- /dev/null +++ b/python/base-gpu/initial-condarc @@ -0,0 +1,6 @@ +# Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html + +auto_update_conda: false +show_channel_urls: true +channels: + - conda-forge \ No newline at end of file diff --git a/python/base-gpu/requirements.in b/python/base-gpu/requirements.in new file mode 100644 index 00000000..1266793c --- /dev/null +++ b/python/base-gpu/requirements.in @@ -0,0 +1,3 @@ +ipykernel==6.23.3 +cuda-python==12.* +nvidia-cudnn-cu11==8.6.0.163 diff --git a/python/base-gpu/run.sh b/python/base-gpu/run.sh new file mode 100755 index 00000000..9a13bc0b --- /dev/null +++ b/python/base-gpu/run.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -o pipefail +set -o nounset +set -o errexit + +echo "Local time: $(date)" + +set -x + +connection_file=/tmp/connection_file.json +cp /etc/noteable/connections/connection_file.json ${connection_file} + +# Inject Secrets into environment (see script docstring for more info) +# set +x to avoid echoing the Secrets in plaintext to logs +set +x +echo "Injecting Secrets into environment, echoing is turned off" +# shellcheck disable=SC1091 +source /tmp/secrets_helper.sh +echo "Done injecting Secrets, turning echoing back on" +set -x + +echo "Starting Python kernel" +# https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP +export PYTHONSTARTUP=~/.pythonrc + +mamba run python -m ipykernel_launcher -f ${connection_file} --debug \ No newline at end of file diff --git a/python/base/3.10/3.10.requirements.in b/python/base/3.10/3.10.requirements.in new file mode 100755 index 00000000..a67c8490 --- /dev/null +++ b/python/base/3.10/3.10.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.10-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base/3.10/Dockerfile b/python/base/3.10/Dockerfile new file mode 100755 index 00000000..19a4fb8e --- /dev/null +++ b/python/base/3.10/Dockerfile @@ -0,0 +1,73 @@ +# syntax = docker/dockerfile:1.2.1 +# --- +# Bare minimum Python 3.x.x image with ipykernel installed +# - no Python packages aside from builtins and ipykernel +# - no git, secrets, SQL, extensions, etc +# --- +ARG NBL_PYTHON_VERSION=3.10 +FROM python:${NBL_PYTHON_VERSION}-slim-bullseye as base + +# User/group setup +USER root + +ENV NB_USER="noteable" \ + NB_UID=4004 \ + NB_GID=4004 + +RUN groupadd --gid 4004 noteable && \ + useradd --uid 4004 \ + --shell /bin/false \ + --create-home \ + --no-log-init \ + --gid noteable noteable \ + --home-dir /srv/noteable && \ + chown --recursive noteable:noteable /srv/noteable && \ + mkdir /opt/venv && chown noteable:noteable /opt/venv && \ + mkdir /etc/ipython && chown noteable:noteable /etc/ipython && \ + mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable + +WORKDIR /tmp + +# Install tini to manage passing signals to the child kernel process +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini + +COPY apt-install /usr/bin/ +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +USER noteable + +ENV VIRTUAL_ENV=/opt/venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +COPY requirements.txt /tmp/base_requirements.txt +RUN pip install --no-cache-dir -r /tmp/base_requirements.txt + +COPY secrets_helper.sh /tmp/secrets_helper.sh +COPY run.sh /usr/local/bin + +ENV HOME="/srv/noteable" \ + PYTHONPATH="/srv/noteable/.local/lib/python${NBL_PYTHON_VERSION}/site-packages:${PYTHONPATH}" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +WORKDIR /etc/noteable/project +EXPOSE 50001-50005 + +ENTRYPOINT ["/tini", "-g", "--"] +CMD ["run.sh"] + +ARG NBL_ARG_BUILD_TIMESTAMP="undefined" +ARG NBL_ARG_REVISION="undefined" +ARG NBL_ARG_BUILD_URL="undefined" +ARG NBL_ARG_VERSION="undefined" +LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \ + org.opencontainers.image.revision="${NBL_ARG_REVISION}" \ + org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \ + org.opencontainers.image.title="noteable-python-${NBL_PYTHON_VERSION}" \ + org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \ + org.opencontainers.image.vendor="Noteable" \ + org.opencontainers.image.version="${NBL_ARG_VERSION}" diff --git a/python/base/3.10/requirements.txt b/python/base/3.10/requirements.txt new file mode 100644 index 00000000..8b207d4a --- /dev/null +++ b/python/base/3.10/requirements.txt @@ -0,0 +1,82 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/base/3.10/requirements.txt --resolver=backtracking python/base/3.10/3.10.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +ipykernel==6.23.3 + # via -r python/base/3.10/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.0 + # via jupyter-core +prompt-toolkit==3.0.38 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/python/base/3.11/3.11.requirements.in b/python/base/3.11/3.11.requirements.in new file mode 100755 index 00000000..702513a3 --- /dev/null +++ b/python/base/3.11/3.11.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.11-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base/3.11/Dockerfile b/python/base/3.11/Dockerfile new file mode 100755 index 00000000..e2640481 --- /dev/null +++ b/python/base/3.11/Dockerfile @@ -0,0 +1,73 @@ +# syntax = docker/dockerfile:1.2.1 +# --- +# Bare minimum Python 3.x.x image with ipykernel installed +# - no Python packages aside from builtins and ipykernel +# - no git, secrets, SQL, extensions, etc +# --- +ARG NBL_PYTHON_VERSION=3.11 +FROM python:${NBL_PYTHON_VERSION}-slim-bullseye as base + +# User/group setup +USER root + +ENV NB_USER="noteable" \ + NB_UID=4004 \ + NB_GID=4004 + +RUN groupadd --gid 4004 noteable && \ + useradd --uid 4004 \ + --shell /bin/false \ + --create-home \ + --no-log-init \ + --gid noteable noteable \ + --home-dir /srv/noteable && \ + chown --recursive noteable:noteable /srv/noteable && \ + mkdir /opt/venv && chown noteable:noteable /opt/venv && \ + mkdir /etc/ipython && chown noteable:noteable /etc/ipython && \ + mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable + +WORKDIR /tmp + +# Install tini to manage passing signals to the child kernel process +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini + +COPY apt-install /usr/bin/ +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +USER noteable + +ENV VIRTUAL_ENV=/opt/venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +COPY requirements.txt /tmp/base_requirements.txt +RUN pip install --no-cache-dir -r /tmp/base_requirements.txt + +COPY secrets_helper.sh /tmp/secrets_helper.sh +COPY run.sh /usr/local/bin + +ENV HOME="/srv/noteable" \ + PYTHONPATH="/srv/noteable/.local/lib/python${NBL_PYTHON_VERSION}/site-packages:${PYTHONPATH}" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +WORKDIR /etc/noteable/project +EXPOSE 50001-50005 + +ENTRYPOINT ["/tini", "-g", "--"] +CMD ["run.sh"] + +ARG NBL_ARG_BUILD_TIMESTAMP="undefined" +ARG NBL_ARG_REVISION="undefined" +ARG NBL_ARG_BUILD_URL="undefined" +ARG NBL_ARG_VERSION="undefined" +LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \ + org.opencontainers.image.revision="${NBL_ARG_REVISION}" \ + org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \ + org.opencontainers.image.title="noteable-python-${NBL_PYTHON_VERSION}" \ + org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \ + org.opencontainers.image.vendor="Noteable" \ + org.opencontainers.image.version="${NBL_ARG_VERSION}" diff --git a/python/base/3.11/requirements.txt b/python/base/3.11/requirements.txt new file mode 100644 index 00000000..b127ceb3 --- /dev/null +++ b/python/base/3.11/requirements.txt @@ -0,0 +1,82 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=python/base/3.11/requirements.txt --resolver=backtracking python/base/3.11/3.11.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +ipykernel==6.23.3 + # via -r python/base/3.11/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.0 + # via jupyter-core +prompt-toolkit==3.0.38 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/python/base/3.9/3.9.requirements.in b/python/base/3.9/3.9.requirements.in new file mode 100755 index 00000000..9ec676dd --- /dev/null +++ b/python/base/3.9/3.9.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.9-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/base/3.9/Dockerfile b/python/base/3.9/Dockerfile new file mode 100755 index 00000000..d39a4b47 --- /dev/null +++ b/python/base/3.9/Dockerfile @@ -0,0 +1,73 @@ +# syntax = docker/dockerfile:1.2.1 +# --- +# Bare minimum Python 3.x.x image with ipykernel installed +# - no Python packages aside from builtins and ipykernel +# - no git, secrets, SQL, extensions, etc +# --- +ARG NBL_PYTHON_VERSION=3.9 +FROM python:${NBL_PYTHON_VERSION}-slim-bullseye as base + +# User/group setup +USER root + +ENV NB_USER="noteable" \ + NB_UID=4004 \ + NB_GID=4004 + +RUN groupadd --gid 4004 noteable && \ + useradd --uid 4004 \ + --shell /bin/false \ + --create-home \ + --no-log-init \ + --gid noteable noteable \ + --home-dir /srv/noteable && \ + chown --recursive noteable:noteable /srv/noteable && \ + mkdir /opt/venv && chown noteable:noteable /opt/venv && \ + mkdir /etc/ipython && chown noteable:noteable /etc/ipython && \ + mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable + +WORKDIR /tmp + +# Install tini to manage passing signals to the child kernel process +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini + +COPY apt-install /usr/bin/ +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +USER noteable + +ENV VIRTUAL_ENV=/opt/venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +COPY requirements.txt /tmp/base_requirements.txt +RUN pip install --no-cache-dir -r /tmp/base_requirements.txt + +COPY secrets_helper.sh /tmp/secrets_helper.sh +COPY run.sh /usr/local/bin + +ENV HOME="/srv/noteable" \ + PYTHONPATH="/srv/noteable/.local/lib/python${NBL_PYTHON_VERSION}/site-packages:${PYTHONPATH}" \ + XDG_CACHE_HOME="/srv/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +WORKDIR /etc/noteable/project +EXPOSE 50001-50005 + +ENTRYPOINT ["/tini", "-g", "--"] +CMD ["run.sh"] + +ARG NBL_ARG_BUILD_TIMESTAMP="undefined" +ARG NBL_ARG_REVISION="undefined" +ARG NBL_ARG_BUILD_URL="undefined" +ARG NBL_ARG_VERSION="undefined" +LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \ + org.opencontainers.image.revision="${NBL_ARG_REVISION}" \ + org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \ + org.opencontainers.image.title="noteable-python-${NBL_PYTHON_VERSION}" \ + org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \ + org.opencontainers.image.vendor="Noteable" \ + org.opencontainers.image.version="${NBL_ARG_VERSION}" diff --git a/python/base/3.9/requirements.txt b/python/base/3.9/requirements.txt new file mode 100644 index 00000000..0f7cbfff --- /dev/null +++ b/python/base/3.9/requirements.txt @@ -0,0 +1,88 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/base/3.9/requirements.txt --resolver=backtracking python/base/3.9/3.9.requirements.in +# +asttokens==2.2.1 + # via stack-data +backcall==0.2.0 + # via ipython +comm==0.1.3 + # via ipykernel +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via ipython +executing==1.2.0 + # via stack-data +importlib-metadata==6.7.0 + # via jupyter-client +ipykernel==6.23.3 + # via -r python/base/3.9/requirements.in +ipython==8.14.0 + # via ipykernel +jedi==0.18.2 + # via ipython +jupyter-client==8.3.0 + # via ipykernel +jupyter-core==5.3.1 + # via + # ipykernel + # jupyter-client +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +nest-asyncio==1.5.6 + # via ipykernel +packaging==23.1 + # via ipykernel +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.8.0 + # via jupyter-core +prompt-toolkit==3.0.38 + # via ipython +psutil==5.9.5 + # via ipykernel +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.15.1 + # via ipython +python-dateutil==2.8.2 + # via jupyter-client +pyzmq==25.1.0 + # via + # ipykernel + # jupyter-client +six==1.16.0 + # via + # asttokens + # python-dateutil +stack-data==0.6.2 + # via ipython +tornado==6.3.2 + # via + # ipykernel + # jupyter-client +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # matplotlib-inline +typing-extensions==4.6.3 + # via ipython +wcwidth==0.2.6 + # via prompt-toolkit +zipp==3.15.0 + # via importlib-metadata diff --git a/python/base/Aptfile b/python/base/Aptfile new file mode 100755 index 00000000..268ae881 --- /dev/null +++ b/python/base/Aptfile @@ -0,0 +1,12 @@ +build-essential +ca-certificates +curl +bzip2 +gnupg2 +wget +g++ +git +jq +libudunits2-dev +procps +unixodbc-dev diff --git a/python/base/requirements.in b/python/base/requirements.in new file mode 100644 index 00000000..0e401e65 --- /dev/null +++ b/python/base/requirements.in @@ -0,0 +1 @@ +ipykernel==6.23.3 \ No newline at end of file diff --git a/python/datascience/3.10/3.10.gpu.requirements.in b/python/datascience/3.10/3.10.gpu.requirements.in new file mode 100755 index 00000000..60ac2fd5 --- /dev/null +++ b/python/datascience/3.10/3.10.gpu.requirements.in @@ -0,0 +1,2 @@ +-r gpu.requirements.in +# any 3.10-specific requirements for the GPU build should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.10/3.10.requirements.in b/python/datascience/3.10/3.10.requirements.in new file mode 100755 index 00000000..a67c8490 --- /dev/null +++ b/python/datascience/3.10/3.10.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.10-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.10/Dockerfile b/python/datascience/3.10/Dockerfile new file mode 100755 index 00000000..e00b7a20 --- /dev/null +++ b/python/datascience/3.10/Dockerfile @@ -0,0 +1,13 @@ +# syntax = docker/dockerfile:1.2.1 +# Datascience build: adds packages based on the NumFOCUS datascience stack +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +COPY requirements.txt /tmp/datascience_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_requirements.txt + +FROM base as gpu + +COPY gpu.requirements.txt /tmp/datascience_gpu_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_gpu_requirements.txt diff --git a/python/datascience/3.10/gpu.requirements.txt b/python/datascience/3.10/gpu.requirements.txt new file mode 100644 index 00000000..8bbc1118 --- /dev/null +++ b/python/datascience/3.10/gpu.requirements.txt @@ -0,0 +1,322 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.10/gpu.requirements.txt --resolver=backtracking python/datascience/3.10/3.10.gpu.requirements.in +# +absl-py==1.4.0 + # via + # tensorboard + # tensorflow +astunparse==1.6.3 + # via tensorflow +blis==0.7.9 + # via thinc +cachetools==5.3.1 + # via google-auth +catalogue==2.0.8 + # via + # spacy + # srsly + # thinc +certifi==2023.5.7 + # via requests +charset-normalizer==3.1.0 + # via requests +click==8.1.3 + # via typer +cmake==3.26.4 + # via triton +confection==0.1.0 + # via thinc +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +cymem==2.0.7 + # via + # preshed + # spacy + # thinc +fastai==2.7.12 + # via -r python/datascience/3.10/gpu.requirements.in +fastcore==1.5.29 + # via + # fastai + # fastdownload +fastdownload==0.0.7 + # via fastai +fastprogress==1.0.3 + # via + # fastai + # fastdownload +filelock==3.12.2 + # via + # torch + # triton +flatbuffers==23.5.26 + # via tensorflow +fonttools==4.40.0 + # via matplotlib +gast==0.4.0 + # via tensorflow +google-auth==2.21.0 + # via + # google-auth-oauthlib + # tensorboard +google-auth-oauthlib==1.0.0 + # via tensorboard +google-pasta==0.2.0 + # via tensorflow +grpcio==1.56.0 + # via + # tensorboard + # tensorflow +h5py==3.9.0 + # via tensorflow +idna==3.4 + # via requests +jax==0.4.13 + # via tensorflow +jinja2==3.1.2 + # via + # spacy + # torch +joblib==1.3.1 + # via scikit-learn +keras==2.12.0 + # via tensorflow +kiwisolver==1.4.4 + # via matplotlib +langcodes==3.3.0 + # via spacy +libclang==16.0.0 + # via tensorflow +lit==16.0.6 + # via triton +markdown==3.4.3 + # via tensorboard +markupsafe==2.1.3 + # via + # jinja2 + # werkzeug +matplotlib==3.7.2 + # via fastai +ml-dtypes==0.2.0 + # via jax +mpmath==1.3.0 + # via sympy +murmurhash==1.0.9 + # via + # preshed + # spacy + # thinc +networkx==3.1 + # via torch +numpy==1.24.3 + # via + # blis + # contourpy + # h5py + # jax + # matplotlib + # ml-dtypes + # opt-einsum + # pandas + # scikit-learn + # scipy + # spacy + # tensorboard + # tensorflow + # thinc + # torchvision +nvidia-cublas-cu11==11.10.3.66 + # via + # nvidia-cudnn-cu11 + # nvidia-cusolver-cu11 + # torch +nvidia-cuda-cupti-cu11==11.7.101 + # via torch +nvidia-cuda-nvrtc-cu11==11.7.99 + # via torch +nvidia-cuda-runtime-cu11==11.7.99 + # via torch +nvidia-cudnn-cu11==8.5.0.96 + # via torch +nvidia-cufft-cu11==10.9.0.58 + # via torch +nvidia-curand-cu11==10.2.10.91 + # via torch +nvidia-cusolver-cu11==11.4.0.1 + # via torch +nvidia-cusparse-cu11==11.7.4.91 + # via torch +nvidia-nccl-cu11==2.14.3 + # via torch +nvidia-nvtx-cu11==11.7.91 + # via torch +oauthlib==3.2.2 + # via requests-oauthlib +opt-einsum==3.3.0 + # via + # jax + # tensorflow +packaging==23.1 + # via + # fastai + # fastcore + # matplotlib + # spacy + # tensorflow + # thinc +pandas==2.0.3 + # via fastai +pathy==0.10.2 + # via spacy +pillow==10.0.0 + # via + # fastai + # matplotlib + # torchvision +preshed==3.0.8 + # via + # spacy + # thinc +protobuf==4.23.3 + # via + # tensorboard + # tensorflow +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pydantic==1.10.11 + # via + # confection + # spacy + # thinc +pyparsing==3.0.9 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +pyyaml==6.0 + # via fastai +requests==2.31.0 + # via + # fastai + # requests-oauthlib + # spacy + # tensorboard + # torchvision +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rsa==4.9 + # via google-auth +scikit-learn==1.3.0 + # via fastai +scipy==1.11.1 + # via + # fastai + # jax + # scikit-learn +six==1.16.0 + # via + # astunparse + # google-auth + # google-pasta + # python-dateutil + # tensorflow +smart-open==6.3.0 + # via + # pathy + # spacy +spacy==3.5.4 + # via fastai +spacy-legacy==3.0.12 + # via spacy +spacy-loggers==1.0.4 + # via spacy +srsly==2.4.6 + # via + # confection + # spacy + # thinc +sympy==1.12 + # via torch +tensorboard==2.12.3 + # via tensorflow +tensorboard-data-server==0.7.1 + # via tensorboard +tensorflow==2.12.1 + # via -r python/datascience/3.10/gpu.requirements.in +tensorflow-estimator==2.12.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.32.0 + # via tensorflow +termcolor==2.3.0 + # via tensorflow +thinc==8.1.10 + # via spacy +threadpoolctl==3.1.0 + # via scikit-learn +torch==2.0.1 + # via + # -r python/datascience/3.10/gpu.requirements.in + # fastai + # torchaudio + # torchvision + # triton +torchaudio==2.0.2 + # via -r python/datascience/3.10/gpu.requirements.in +torchvision==0.15.2 + # via + # -r python/datascience/3.10/gpu.requirements.in + # fastai +tqdm==4.65.0 + # via spacy +triton==2.0.0 + # via torch +typer==0.9.0 + # via + # pathy + # spacy +typing-extensions==4.5.0 + # via + # pydantic + # tensorflow + # torch + # typer +tzdata==2023.3 + # via pandas +urllib3==1.26.16 + # via + # google-auth + # requests +wasabi==1.1.2 + # via + # spacy + # thinc +werkzeug==2.3.6 + # via tensorboard +wheel==0.40.0 + # via + # astunparse + # nvidia-cublas-cu11 + # nvidia-cuda-cupti-cu11 + # nvidia-cuda-runtime-cu11 + # nvidia-curand-cu11 + # nvidia-cusparse-cu11 + # nvidia-nvtx-cu11 + # tensorboard +wrapt==1.14.1 + # via tensorflow + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/python/datascience/3.10/requirements.txt b/python/datascience/3.10/requirements.txt new file mode 100644 index 00000000..6b26d4b9 --- /dev/null +++ b/python/datascience/3.10/requirements.txt @@ -0,0 +1,64 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.10/requirements.txt --resolver=backtracking python/datascience/3.10/3.10.requirements.in +# +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +fonttools==4.40.0 + # via matplotlib +joblib==1.3.0 + # via scikit-learn +kiwisolver==1.4.4 + # via matplotlib +matplotlib==3.7.0 + # via + # -r python/datascience/3.10/requirements.in + # seaborn +numpy==1.25.0 + # via + # contourpy + # matplotlib + # pandas + # scikit-learn + # scipy + # seaborn +packaging==23.1 + # via + # matplotlib + # plotly +pandas==2.0.2 + # via + # -r python/datascience/3.10/requirements.in + # seaborn +pillow==9.5.0 + # via matplotlib +plotly==5.15.0 + # via -r python/datascience/3.10/requirements.in +pyparsing==3.1.0 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +scikit-learn==1.2.2 + # via -r python/datascience/3.10/requirements.in +scipy==1.11.1 + # via + # -r python/datascience/3.10/requirements.in + # scikit-learn +seaborn==0.12.2 + # via -r python/datascience/3.10/requirements.in +six==1.16.0 + # via python-dateutil +tenacity==8.2.2 + # via plotly +threadpoolctl==3.1.0 + # via scikit-learn +tzdata==2023.3 + # via pandas diff --git a/python/datascience/3.11/3.11.gpu.requirements.in b/python/datascience/3.11/3.11.gpu.requirements.in new file mode 100755 index 00000000..beaa7301 --- /dev/null +++ b/python/datascience/3.11/3.11.gpu.requirements.in @@ -0,0 +1,2 @@ +-r gpu.requirements.in +# any 3.11-specific requirements for the GPU build should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.11/3.11.requirements.in b/python/datascience/3.11/3.11.requirements.in new file mode 100755 index 00000000..702513a3 --- /dev/null +++ b/python/datascience/3.11/3.11.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.11-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.11/Dockerfile b/python/datascience/3.11/Dockerfile new file mode 100755 index 00000000..e00b7a20 --- /dev/null +++ b/python/datascience/3.11/Dockerfile @@ -0,0 +1,13 @@ +# syntax = docker/dockerfile:1.2.1 +# Datascience build: adds packages based on the NumFOCUS datascience stack +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +COPY requirements.txt /tmp/datascience_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_requirements.txt + +FROM base as gpu + +COPY gpu.requirements.txt /tmp/datascience_gpu_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_gpu_requirements.txt diff --git a/python/datascience/3.11/gpu.requirements.txt b/python/datascience/3.11/gpu.requirements.txt new file mode 100644 index 00000000..c0bbb9d1 --- /dev/null +++ b/python/datascience/3.11/gpu.requirements.txt @@ -0,0 +1,322 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.11/gpu.requirements.txt --resolver=backtracking python/datascience/3.11/3.11.gpu.requirements.in +# +absl-py==1.4.0 + # via + # tensorboard + # tensorflow +astunparse==1.6.3 + # via tensorflow +blis==0.7.9 + # via thinc +cachetools==5.3.1 + # via google-auth +catalogue==2.0.8 + # via + # spacy + # srsly + # thinc +certifi==2023.5.7 + # via requests +charset-normalizer==3.2.0 + # via requests +click==8.1.4 + # via typer +cmake==3.26.4 + # via triton +confection==0.1.0 + # via thinc +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +cymem==2.0.7 + # via + # preshed + # spacy + # thinc +fastai==2.7.12 + # via -r python/datascience/3.11/gpu.requirements.in +fastcore==1.5.29 + # via + # fastai + # fastdownload +fastdownload==0.0.7 + # via fastai +fastprogress==1.0.3 + # via + # fastai + # fastdownload +filelock==3.12.2 + # via + # torch + # triton +flatbuffers==23.5.26 + # via tensorflow +fonttools==4.40.0 + # via matplotlib +gast==0.4.0 + # via tensorflow +google-auth==2.21.0 + # via + # google-auth-oauthlib + # tensorboard +google-auth-oauthlib==1.0.0 + # via tensorboard +google-pasta==0.2.0 + # via tensorflow +grpcio==1.56.0 + # via + # tensorboard + # tensorflow +h5py==3.9.0 + # via tensorflow +idna==3.4 + # via requests +jax==0.4.13 + # via tensorflow +jinja2==3.1.2 + # via + # spacy + # torch +joblib==1.3.1 + # via scikit-learn +keras==2.12.0 + # via tensorflow +kiwisolver==1.4.4 + # via matplotlib +langcodes==3.3.0 + # via spacy +libclang==16.0.0 + # via tensorflow +lit==16.0.6 + # via triton +markdown==3.4.3 + # via tensorboard +markupsafe==2.1.3 + # via + # jinja2 + # werkzeug +matplotlib==3.7.2 + # via fastai +ml-dtypes==0.2.0 + # via jax +mpmath==1.3.0 + # via sympy +murmurhash==1.0.9 + # via + # preshed + # spacy + # thinc +networkx==3.1 + # via torch +numpy==1.24.3 + # via + # blis + # contourpy + # h5py + # jax + # matplotlib + # ml-dtypes + # opt-einsum + # pandas + # scikit-learn + # scipy + # spacy + # tensorboard + # tensorflow + # thinc + # torchvision +nvidia-cublas-cu11==11.10.3.66 + # via + # nvidia-cudnn-cu11 + # nvidia-cusolver-cu11 + # torch +nvidia-cuda-cupti-cu11==11.7.101 + # via torch +nvidia-cuda-nvrtc-cu11==11.7.99 + # via torch +nvidia-cuda-runtime-cu11==11.7.99 + # via torch +nvidia-cudnn-cu11==8.5.0.96 + # via torch +nvidia-cufft-cu11==10.9.0.58 + # via torch +nvidia-curand-cu11==10.2.10.91 + # via torch +nvidia-cusolver-cu11==11.4.0.1 + # via torch +nvidia-cusparse-cu11==11.7.4.91 + # via torch +nvidia-nccl-cu11==2.14.3 + # via torch +nvidia-nvtx-cu11==11.7.91 + # via torch +oauthlib==3.2.2 + # via requests-oauthlib +opt-einsum==3.3.0 + # via + # jax + # tensorflow +packaging==23.1 + # via + # fastai + # fastcore + # matplotlib + # spacy + # tensorflow + # thinc +pandas==2.0.3 + # via fastai +pathy==0.10.2 + # via spacy +pillow==10.0.0 + # via + # fastai + # matplotlib + # torchvision +preshed==3.0.8 + # via + # spacy + # thinc +protobuf==4.23.4 + # via + # tensorboard + # tensorflow +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pydantic==1.10.11 + # via + # confection + # spacy + # thinc +pyparsing==3.0.9 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +pyyaml==6.0 + # via fastai +requests==2.31.0 + # via + # fastai + # requests-oauthlib + # spacy + # tensorboard + # torchvision +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rsa==4.9 + # via google-auth +scikit-learn==1.3.0 + # via fastai +scipy==1.11.1 + # via + # fastai + # jax + # scikit-learn +six==1.16.0 + # via + # astunparse + # google-auth + # google-pasta + # python-dateutil + # tensorflow +smart-open==6.3.0 + # via + # pathy + # spacy +spacy==3.6.0 + # via fastai +spacy-legacy==3.0.12 + # via spacy +spacy-loggers==1.0.4 + # via spacy +srsly==2.4.6 + # via + # confection + # spacy + # thinc +sympy==1.12 + # via torch +tensorboard==2.12.3 + # via tensorflow +tensorboard-data-server==0.7.1 + # via tensorboard +tensorflow==2.12.1 + # via -r python/datascience/3.11/gpu.requirements.in +tensorflow-estimator==2.12.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.32.0 + # via tensorflow +termcolor==2.3.0 + # via tensorflow +thinc==8.1.10 + # via spacy +threadpoolctl==3.1.0 + # via scikit-learn +torch==2.0.1 + # via + # -r python/datascience/3.11/gpu.requirements.in + # fastai + # torchaudio + # torchvision + # triton +torchaudio==2.0.2 + # via -r python/datascience/3.11/gpu.requirements.in +torchvision==0.15.2 + # via + # -r python/datascience/3.11/gpu.requirements.in + # fastai +tqdm==4.65.0 + # via spacy +triton==2.0.0 + # via torch +typer==0.9.0 + # via + # pathy + # spacy +typing-extensions==4.5.0 + # via + # pydantic + # tensorflow + # torch + # typer +tzdata==2023.3 + # via pandas +urllib3==1.26.16 + # via + # google-auth + # requests +wasabi==1.1.2 + # via + # spacy + # thinc +werkzeug==2.3.6 + # via tensorboard +wheel==0.40.0 + # via + # astunparse + # nvidia-cublas-cu11 + # nvidia-cuda-cupti-cu11 + # nvidia-cuda-runtime-cu11 + # nvidia-curand-cu11 + # nvidia-cusparse-cu11 + # nvidia-nvtx-cu11 + # tensorboard +wrapt==1.14.1 + # via tensorflow + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/python/datascience/3.11/requirements.txt b/python/datascience/3.11/requirements.txt new file mode 100644 index 00000000..f1354ab9 --- /dev/null +++ b/python/datascience/3.11/requirements.txt @@ -0,0 +1,64 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.11/requirements.txt --resolver=backtracking python/datascience/3.11/3.11.requirements.in +# +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +fonttools==4.40.0 + # via matplotlib +joblib==1.3.0 + # via scikit-learn +kiwisolver==1.4.4 + # via matplotlib +matplotlib==3.7.0 + # via + # -r python/datascience/3.11/requirements.in + # seaborn +numpy==1.25.0 + # via + # contourpy + # matplotlib + # pandas + # scikit-learn + # scipy + # seaborn +packaging==23.1 + # via + # matplotlib + # plotly +pandas==2.0.2 + # via + # -r python/datascience/3.11/requirements.in + # seaborn +pillow==9.5.0 + # via matplotlib +plotly==5.15.0 + # via -r python/datascience/3.11/requirements.in +pyparsing==3.1.0 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +scikit-learn==1.2.2 + # via -r python/datascience/3.11/requirements.in +scipy==1.11.1 + # via + # -r python/datascience/3.11/requirements.in + # scikit-learn +seaborn==0.12.2 + # via -r python/datascience/3.11/requirements.in +six==1.16.0 + # via python-dateutil +tenacity==8.2.2 + # via plotly +threadpoolctl==3.1.0 + # via scikit-learn +tzdata==2023.3 + # via pandas diff --git a/python/datascience/3.9/3.9.gpu.requirements.in b/python/datascience/3.9/3.9.gpu.requirements.in new file mode 100755 index 00000000..8ccced15 --- /dev/null +++ b/python/datascience/3.9/3.9.gpu.requirements.in @@ -0,0 +1,2 @@ +-r gpu.requirements.in +# any 3.9-specific requirements for the GPU build should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.9/3.9.requirements.in b/python/datascience/3.9/3.9.requirements.in new file mode 100755 index 00000000..9ec676dd --- /dev/null +++ b/python/datascience/3.9/3.9.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.9-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/datascience/3.9/Dockerfile b/python/datascience/3.9/Dockerfile new file mode 100755 index 00000000..e00b7a20 --- /dev/null +++ b/python/datascience/3.9/Dockerfile @@ -0,0 +1,13 @@ +# syntax = docker/dockerfile:1.2.1 +# Datascience build: adds packages based on the NumFOCUS datascience stack +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +COPY requirements.txt /tmp/datascience_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_requirements.txt + +FROM base as gpu + +COPY gpu.requirements.txt /tmp/datascience_gpu_requirements.txt +RUN pip install --no-cache-dir -r /tmp/datascience_gpu_requirements.txt diff --git a/python/datascience/3.9/gpu.requirements.txt b/python/datascience/3.9/gpu.requirements.txt new file mode 100644 index 00000000..312363d4 --- /dev/null +++ b/python/datascience/3.9/gpu.requirements.txt @@ -0,0 +1,332 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.9/gpu.requirements.txt --resolver=backtracking python/datascience/3.9/3.9.gpu.requirements.in +# +absl-py==1.4.0 + # via + # tensorboard + # tensorflow +astunparse==1.6.3 + # via tensorflow +blis==0.7.9 + # via thinc +cachetools==5.3.1 + # via google-auth +catalogue==2.0.8 + # via + # spacy + # srsly + # thinc +certifi==2023.5.7 + # via requests +charset-normalizer==3.1.0 + # via requests +click==8.1.3 + # via typer +cmake==3.26.4 + # via triton +confection==0.1.0 + # via thinc +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +cymem==2.0.7 + # via + # preshed + # spacy + # thinc +fastai==2.7.12 + # via -r python/datascience/3.9/gpu.requirements.in +fastcore==1.5.29 + # via + # fastai + # fastdownload +fastdownload==0.0.7 + # via fastai +fastprogress==1.0.3 + # via + # fastai + # fastdownload +filelock==3.12.2 + # via + # torch + # triton +flatbuffers==23.5.26 + # via tensorflow +fonttools==4.40.0 + # via matplotlib +gast==0.4.0 + # via tensorflow +google-auth==2.21.0 + # via + # google-auth-oauthlib + # tensorboard +google-auth-oauthlib==1.0.0 + # via tensorboard +google-pasta==0.2.0 + # via tensorflow +grpcio==1.56.0 + # via + # tensorboard + # tensorflow +h5py==3.9.0 + # via tensorflow +idna==3.4 + # via requests +importlib-metadata==6.7.0 + # via + # jax + # markdown +importlib-resources==5.12.0 + # via matplotlib +jax==0.4.13 + # via tensorflow +jinja2==3.1.2 + # via + # spacy + # torch +joblib==1.3.1 + # via scikit-learn +keras==2.12.0 + # via tensorflow +kiwisolver==1.4.4 + # via matplotlib +langcodes==3.3.0 + # via spacy +libclang==16.0.0 + # via tensorflow +lit==16.0.6 + # via triton +markdown==3.4.3 + # via tensorboard +markupsafe==2.1.3 + # via + # jinja2 + # werkzeug +matplotlib==3.7.2 + # via fastai +ml-dtypes==0.2.0 + # via jax +mpmath==1.3.0 + # via sympy +murmurhash==1.0.9 + # via + # preshed + # spacy + # thinc +networkx==3.1 + # via torch +numpy==1.24.3 + # via + # blis + # contourpy + # h5py + # jax + # matplotlib + # ml-dtypes + # opt-einsum + # pandas + # scikit-learn + # scipy + # spacy + # tensorboard + # tensorflow + # thinc + # torchvision +nvidia-cublas-cu11==11.10.3.66 + # via + # nvidia-cudnn-cu11 + # nvidia-cusolver-cu11 + # torch +nvidia-cuda-cupti-cu11==11.7.101 + # via torch +nvidia-cuda-nvrtc-cu11==11.7.99 + # via torch +nvidia-cuda-runtime-cu11==11.7.99 + # via torch +nvidia-cudnn-cu11==8.5.0.96 + # via torch +nvidia-cufft-cu11==10.9.0.58 + # via torch +nvidia-curand-cu11==10.2.10.91 + # via torch +nvidia-cusolver-cu11==11.4.0.1 + # via torch +nvidia-cusparse-cu11==11.7.4.91 + # via torch +nvidia-nccl-cu11==2.14.3 + # via torch +nvidia-nvtx-cu11==11.7.91 + # via torch +oauthlib==3.2.2 + # via requests-oauthlib +opt-einsum==3.3.0 + # via + # jax + # tensorflow +packaging==23.1 + # via + # fastai + # fastcore + # matplotlib + # spacy + # tensorflow + # thinc +pandas==2.0.3 + # via fastai +pathy==0.10.2 + # via spacy +pillow==10.0.0 + # via + # fastai + # matplotlib + # torchvision +preshed==3.0.8 + # via + # spacy + # thinc +protobuf==4.23.3 + # via + # tensorboard + # tensorflow +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pydantic==1.10.11 + # via + # confection + # spacy + # thinc +pyparsing==3.0.9 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +pyyaml==6.0 + # via fastai +requests==2.31.0 + # via + # fastai + # requests-oauthlib + # spacy + # tensorboard + # torchvision +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rsa==4.9 + # via google-auth +scikit-learn==1.3.0 + # via fastai +scipy==1.11.1 + # via + # fastai + # jax + # scikit-learn +six==1.16.0 + # via + # astunparse + # google-auth + # google-pasta + # python-dateutil + # tensorflow +smart-open==6.3.0 + # via + # pathy + # spacy +spacy==3.5.4 + # via fastai +spacy-legacy==3.0.12 + # via spacy +spacy-loggers==1.0.4 + # via spacy +srsly==2.4.6 + # via + # confection + # spacy + # thinc +sympy==1.12 + # via torch +tensorboard==2.12.3 + # via tensorflow +tensorboard-data-server==0.7.1 + # via tensorboard +tensorflow==2.12.1 + # via -r python/datascience/3.9/gpu.requirements.in +tensorflow-estimator==2.12.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.32.0 + # via tensorflow +termcolor==2.3.0 + # via tensorflow +thinc==8.1.10 + # via spacy +threadpoolctl==3.1.0 + # via scikit-learn +torch==2.0.1 + # via + # -r python/datascience/3.9/gpu.requirements.in + # fastai + # torchaudio + # torchvision + # triton +torchaudio==2.0.2 + # via -r python/datascience/3.9/gpu.requirements.in +torchvision==0.15.2 + # via + # -r python/datascience/3.9/gpu.requirements.in + # fastai +tqdm==4.65.0 + # via spacy +triton==2.0.0 + # via torch +typer==0.9.0 + # via + # pathy + # spacy +typing-extensions==4.5.0 + # via + # pydantic + # tensorflow + # torch + # typer +tzdata==2023.3 + # via pandas +urllib3==1.26.16 + # via + # google-auth + # requests +wasabi==1.1.2 + # via + # spacy + # thinc +werkzeug==2.3.6 + # via tensorboard +wheel==0.40.0 + # via + # astunparse + # nvidia-cublas-cu11 + # nvidia-cuda-cupti-cu11 + # nvidia-cuda-runtime-cu11 + # nvidia-curand-cu11 + # nvidia-cusparse-cu11 + # nvidia-nvtx-cu11 + # tensorboard +wrapt==1.14.1 + # via tensorflow +zipp==3.15.0 + # via + # importlib-metadata + # importlib-resources + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/python/datascience/3.9/requirements.txt b/python/datascience/3.9/requirements.txt new file mode 100644 index 00000000..8db595a6 --- /dev/null +++ b/python/datascience/3.9/requirements.txt @@ -0,0 +1,68 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/datascience/3.9/requirements.txt --resolver=backtracking python/datascience/3.9/3.9.requirements.in +# +contourpy==1.1.0 + # via matplotlib +cycler==0.11.0 + # via matplotlib +fonttools==4.40.0 + # via matplotlib +importlib-resources==5.12.0 + # via matplotlib +joblib==1.3.0 + # via scikit-learn +kiwisolver==1.4.4 + # via matplotlib +matplotlib==3.7.0 + # via + # -r python/datascience/3.9/requirements.in + # seaborn +numpy==1.25.0 + # via + # contourpy + # matplotlib + # pandas + # scikit-learn + # scipy + # seaborn +packaging==23.1 + # via + # matplotlib + # plotly +pandas==2.0.2 + # via + # -r python/datascience/3.9/requirements.in + # seaborn +pillow==9.5.0 + # via matplotlib +plotly==5.15.0 + # via -r python/datascience/3.9/requirements.in +pyparsing==3.1.0 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3 + # via pandas +scikit-learn==1.2.2 + # via -r python/datascience/3.9/requirements.in +scipy==1.11.1 + # via + # -r python/datascience/3.9/requirements.in + # scikit-learn +seaborn==0.12.2 + # via -r python/datascience/3.9/requirements.in +six==1.16.0 + # via python-dateutil +tenacity==8.2.2 + # via plotly +threadpoolctl==3.1.0 + # via scikit-learn +tzdata==2023.3 + # via pandas +zipp==3.15.0 + # via importlib-resources diff --git a/python/datascience/gpu.requirements.in b/python/datascience/gpu.requirements.in new file mode 100755 index 00000000..f7d25981 --- /dev/null +++ b/python/datascience/gpu.requirements.in @@ -0,0 +1,5 @@ +tensorflow==2.12.* +torch==2.* +torchvision==0.15.* +torchaudio==2.* +fastai \ No newline at end of file diff --git a/python/datascience/requirements.in b/python/datascience/requirements.in new file mode 100755 index 00000000..0f987e98 --- /dev/null +++ b/python/datascience/requirements.in @@ -0,0 +1,6 @@ +matplotlib==3.7.0 +pandas==2.0.2 +plotly==5.15.0 +scikit-learn==1.2.2 +scipy==1.11.1 +seaborn==0.12.2 diff --git a/python/noteable/.pythonrc b/python/noteable/.pythonrc new file mode 100755 index 00000000..9e65e9b7 --- /dev/null +++ b/python/noteable/.pythonrc @@ -0,0 +1,33 @@ +import dx +import numpy as np +import pandas as pd +from matplotlib import font_manager, rcParams + +dx.set_display_mode("simple") +dx.set_option("DISPLAY_MAX_ROWS", 50_000) +dx.set_option("DISPLAY_MAX_COLUMNS", 100) +dx.set_option("ENABLE_DATALINK", True) + +# Set up matplotlib to use the Noto Sans CJK fonts for +# Chinese, Japanese, Korean character support +font_manager.fontManager.addfont("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc") +rcParams["font.family"] = "sans-serif" +rcParams["font.sans-serif"] = [ + "Noto Sans CJK HK", + "Noto Sans CJK JP", + "Noto Sans CJK KR", + "Noto Sans CJK SC", + "Noto Sans CJK TC", + # matplotlib defaults + "DejaVu Sans", + "Bitstream Vera Sans", + "Computer Modern Sans Serif", + "Lucida Grande", + "Verdana", + "Geneva", + "Lucid", + "Arial", + "Helvetica", + "Avant Garde", + "sans-serif", +] diff --git a/python/noteable/3.10/3.10.gpu.requirements.in b/python/noteable/3.10/3.10.gpu.requirements.in new file mode 100755 index 00000000..60ac2fd5 --- /dev/null +++ b/python/noteable/3.10/3.10.gpu.requirements.in @@ -0,0 +1,2 @@ +-r gpu.requirements.in +# any 3.10-specific requirements for the GPU build should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/noteable/3.10/3.10.requirements.in b/python/noteable/3.10/3.10.requirements.in new file mode 100755 index 00000000..a67c8490 --- /dev/null +++ b/python/noteable/3.10/3.10.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.10-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/noteable/3.10/Dockerfile b/python/noteable/3.10/Dockerfile new file mode 100755 index 00000000..e8e10e9f --- /dev/null +++ b/python/noteable/3.10/Dockerfile @@ -0,0 +1,44 @@ +# syntax = docker/dockerfile:1.2.1 +# Noteable build: adds packages to enable Noteable-specific functionality: +# - DEX support (via dx) +# - git integration (via gitpython, nbdime) +# - variable explorer, form cells (via sidecar_comms) +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +ARG NBL_PYTHON_VERSION=3.9 + +USER root + +# Prerequisites to install msodbcsql18 driver for MS SQL Server connectivity. +# From https://github.com/MicrosoftDocs/sql-docs/issues/8821#issuecomment-1569088666 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# hadolint ignore=DL3008,DL3009,DL3015 +RUN apt-get update -y && \ + apt-get install gnupg2 lsb-release -y && \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-archive-keyring.gpg && \ + echo "deb [arch=amd64,armhf,arm64 signed-by=/usr/share/keyrings/microsoft-archive-keyring.gpg] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod bullseye main" > /etc/apt/sources.list.d/mssql-release.list + +# hadolint ignore=DL3045 +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +COPY requirements.txt /tmp/noteable_requirements.txt +RUN pip install --no-cache-dir -r /tmp/noteable_requirements.txt + +# Smoke test to ensure packages were installed properly +# hadolint ignore=SC1008,DL3059 +RUN python -c "import dx, noteable, psutil, sidecar_comms" + +COPY .pythonrc /srv/noteable/. +COPY ipython_config.py /etc/ipython +COPY git_credential_helper.py /git_credential_helper.py +COPY git-wrapper.sh /usr/local/bin/git + +USER noteable + +FROM base as gpu + +COPY gpu.requirements.txt /tmp/noteable_gpu_requirements.txt +RUN pip install --no-cache-dir -r /tmp/noteable_gpu_requirements.txt diff --git a/python/noteable/3.10/gpu.requirements.txt b/python/noteable/3.10/gpu.requirements.txt new file mode 100644 index 00000000..a272867a --- /dev/null +++ b/python/noteable/3.10/gpu.requirements.txt @@ -0,0 +1,6 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/noteable/3.10/gpu.requirements.txt --resolver=backtracking python/noteable/3.10/3.10.gpu.requirements.in +# diff --git a/python/noteable/3.10/requirements.txt b/python/noteable/3.10/requirements.txt new file mode 100644 index 00000000..78d3fd00 --- /dev/null +++ b/python/noteable/3.10/requirements.txt @@ -0,0 +1,995 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=python/noteable/3.10/requirements.txt --resolver=backtracking python/noteable/3.10/3.10.requirements.in +# +adagio==0.2.4 + # via + # fugue + # qpd +aiofiles==22.1.0 + # via ypy-websocket +aiosqlite==0.19.0 + # via ypy-websocket +alembic==1.11.1 + # via + # dagster + # databricks-sql-connector +ansiwrap==0.8.4 + # via papermill +antlr4-python3-runtime==4.11.1 + # via + # fugue-sql-antlr + # qpd +anyio==3.6.2 + # via + # httpcore + # jupyter-server +appdirs==1.4.4 + # via fs +argon2-cffi==21.3.0 + # via + # jupyter-server + # nbclassic + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.2.3 + # via + # isoduration + # jinja2-time +asn1crypto==1.5.1 + # via + # oscrypto + # scramp + # snowflake-connector-python +asttokens==2.2.1 + # via stack-data +attrs==23.1.0 + # via jsonschema +babel==2.12.1 + # via jupyterlab-server +backcall==0.2.0 + # via ipython +backoff==2.2.1 + # via + # noteable + # noteable-origami +beautifulsoup4==4.12.2 + # via + # nbconvert + # redshift-connector +binaryornot==0.4.4 + # via cookiecutter +bitmath==1.3.3.1 + # via noteable-origami +bleach==6.0.0 + # via nbconvert +boto3==1.26.136 + # via + # pyathena + # redshift-connector +botocore==1.29.136 + # via + # boto3 + # pyathena + # redshift-connector + # s3transfer +build==0.10.0 + # via singlestoredb +cachetools==5.3.0 + # via google-auth +certifi==2023.5.7 + # via + # httpcore + # httpx + # kubernetes + # requests + # snowflake-connector-python +cffi==1.15.1 + # via + # argon2-cffi-bindings + # cryptography + # snowflake-connector-python +chardet==5.1.0 + # via binaryornot +charset-normalizer==2.1.1 + # via + # requests + # snowflake-connector-python +click==8.1.3 + # via + # cookiecutter + # dagster + # flytekit + # noteable + # papermill + # pyiceberg +clickhouse-driver==0.2.6 + # via clickhouse-sqlalchemy +clickhouse-sqlalchemy==0.2.3 + # via noteable +cloudpickle==2.2.0 + # via + # -r python/noteable/3.10/requirements.in + # flytekit + # papermill-origami +colorama==0.4.6 + # via nbdime +coloredlogs==14.0 + # via dagster +comm==0.1.3 + # via ipykernel +cookiecutter==2.1.1 + # via flytekit +croniter==1.3.14 + # via + # dagster + # flytekit +cryptography==40.0.2 + # via + # jwt + # pyopenssl + # secretstorage + # snowflake-connector-python +dagster==1.0.15 + # via dagstermill +dagstermill==0.16.15 + # via -r python/noteable/3.10/requirements.in +databricks-sql-connector==2.5.2 + # via sqlalchemy-databricks +dataclasses-json==0.5.7 + # via flytekit +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via + # ipython + # retry +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via flytekit +diskcache==5.6.1 + # via flytekit +docker==6.1.2 + # via flytekit +docker-image-py==0.1.12 + # via flytekit +docstring-parser==0.15 + # via + # dagster + # flytekit +duckdb==0.8.0 + # via duckdb-engine +duckdb-engine==0.6.9 + # via + # dx + # noteable +dx @ git+https://www.github.com/noteable-io/dx.git@a9a4310e51236833e818f3d81edc06a2c2fa3b0e + # via -r python/noteable/3.10/requirements.in +entrypoints==0.4 + # via papermill +et-xmlfile==1.1.0 + # via openpyxl +exceptiongroup==1.1.1 + # via dx +executing==1.2.0 + # via stack-data +fastjsonschema==2.16.3 + # via nbformat +filelock==3.12.0 + # via snowflake-connector-python +flyteidl==1.2.10 + # via flytekit +flytekit==1.2.11 + # via flytekitplugins-papermill +flytekitplugins-papermill==1.2.4 + # via -r python/noteable/3.10/requirements.in +fqdn==1.5.1 + # via jsonschema +fs==2.4.16 + # via triad +fsspec==2023.1.0 + # via + # pyathena + # pyiceberg +fugue==0.8.3 + # via + # -r python/noteable/3.10/requirements.in + # fugue-jupyter +fugue-jupyter==0.2.3 + # via -r python/noteable/3.10/requirements.in +fugue-sql-antlr==0.1.6 + # via fugue +future==0.18.3 + # via + # pyhive + # sqlalchemy-bigquery +gitdb==4.0.10 + # via gitpython +gitpython==3.1.31 + # via + # flytekit + # nbdime + # noteable +google-api-core[grpc]==2.11.0 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-core + # sqlalchemy-bigquery +google-auth==2.18.1 + # via + # google-api-core + # google-cloud-core + # kubernetes + # sqlalchemy-bigquery +google-cloud-bigquery==3.10.0 + # via sqlalchemy-bigquery +google-cloud-bigquery-storage==2.6.3 + # via + # noteable + # sqlalchemy-bigquery +google-cloud-core==2.3.2 + # via google-cloud-bigquery +google-crc32c==1.5.0 + # via google-resumable-media +google-resumable-media==2.5.0 + # via google-cloud-bigquery +googleapis-common-protos==1.59.0 + # via + # flyteidl + # google-api-core + # grpcio-status +graphviz==0.20.1 + # via -r python/noteable/3.10/requirements.in +greenlet==1.1.3.post0 + # via + # noteable + # sqlalchemy +grpcio==1.47.5 + # via + # dagster + # flytekit + # google-api-core + # google-cloud-bigquery + # grpcio-health-checking + # grpcio-status +grpcio-health-checking==1.43.0 + # via dagster +grpcio-status==1.47.5 + # via + # flytekit + # google-api-core +h11==0.14.0 + # via httpcore +h2==4.1.0 + # via httpx +hpack==4.0.0 + # via h2 +httpcore==0.16.3 + # via httpx +httpx[http2]==0.23.3 + # via + # noteable + # noteable-origami +humanfriendly==10.0 + # via coloredlogs +hyperframe==6.0.1 + # via h2 +idna==3.4 + # via + # anyio + # jsonschema + # requests + # rfc3986 + # snowflake-connector-python +importlib-metadata==6.6.0 + # via + # flytekit + # keyring +ipykernel==6.23.1 + # via + # dagstermill + # flytekitplugins-papermill + # ipython + # nbclassic + # notebook + # sidecar-comms +ipykernel-logging @ git+https://www.github.com/noteable-io/ipykernel-logging.git@b5ae61c0c99ba8e1a6740fd24b123bc580efe53a + # via -r python/noteable/3.10/requirements.in +ipython[kernel]==8.13.2 + # via + # dx + # fugue-jupyter + # ipykernel + # jupyterlab + # noteable + # scrapbook +ipython-genutils==0.2.0 + # via + # dagstermill + # nbclassic + # notebook +isoduration==20.11.0 + # via jsonschema +jaraco-classes==3.2.3 + # via keyring +jedi==0.18.2 + # via ipython +jeepney==0.8.0 + # via + # keyring + # secretstorage +jinja2==3.1.2 + # via + # cookiecutter + # dagster + # fugue + # fugue-sql-antlr + # jinja2-time + # jinjasql + # jupyter-server + # jupyterlab + # jupyterlab-server + # nbclassic + # nbconvert + # nbdime + # notebook +jinja2-time==0.2.0 + # via cookiecutter +jinjasql @ git+https://github.com/yakhu/jinjasql.git@f8c62d1bea97d0320bb3676f4f83bd2357ccfe55 + # via noteable +jmespath==1.0.1 + # via + # boto3 + # botocore +joblib==1.2.0 + # via flytekit +json5==0.9.14 + # via jupyterlab-server +jsonpointer==2.3 + # via jsonschema +jsonschema[format-nongpl]==4.17.3 + # via + # jupyter-events + # jupyterlab-server + # nbformat + # scrapbook +jupyter-client==8.2.0 + # via + # ipykernel + # jupyter-server + # nbclassic + # nbclient + # notebook +jupyter-core==5.3.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +jupyter-events==0.6.3 + # via + # jupyter-server + # jupyter-server-fileid +jupyter-lsp==2.1.0 + # via jupyterlab-lsp +jupyter-server==2.5.0 + # via + # jupyter-lsp + # jupyter-server-fileid + # jupyter-server-mathjax + # jupyterlab + # jupyterlab-server + # nbclassic + # nbdime + # notebook-shim +jupyter-server-fileid==0.9.0 + # via jupyter-server-ydoc +jupyter-server-mathjax==0.2.6 + # via nbdime +jupyter-server-terminals==0.4.4 + # via jupyter-server +jupyter-server-ydoc==0.8.0 + # via jupyterlab +jupyter-ydoc==0.2.4 + # via + # jupyter-server-ydoc + # jupyterlab +jupyterlab==3.6.3 + # via + # fugue-jupyter + # jupyterlab-lsp +jupyterlab-lsp==3.10.2 + # via fugue-jupyter +jupyterlab-pygments==0.2.2 + # via nbconvert +jupyterlab-server==2.22.1 + # via jupyterlab +jwt==1.3.1 + # via noteable-origami +keyring==23.13.1 + # via flytekit +kubernetes==26.1.0 + # via flytekit +libcst==0.4.9 + # via google-cloud-bigquery-storage +lxml==4.9.2 + # via redshift-connector +lz4==4.3.2 + # via databricks-sql-connector +mako==1.2.4 + # via alembic +markdown-it-py==2.2.0 + # via rich +markupsafe==2.1.2 + # via + # jinja2 + # mako + # nbconvert +marshmallow==3.19.0 + # via + # dataclasses-json + # marshmallow-enum + # marshmallow-jsonschema +marshmallow-enum==1.5.1 + # via dataclasses-json +marshmallow-jsonschema==0.13.0 + # via flytekit +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +mdurl==0.1.2 + # via markdown-it-py +mistune==2.0.5 + # via nbconvert +mmhash3==3.0.1 + # via pyiceberg +more-itertools==9.1.0 + # via jaraco-classes +mypy-extensions==1.0.0 + # via typing-inspect +mysqlclient==2.1.1 + # via noteable +natsort==8.3.1 + # via flytekit +nbclassic==1.0.0 + # via + # jupyterlab + # notebook +nbclient==0.7.4 + # via + # nbconvert + # papermill +nbconvert==7.4.0 + # via + # flytekitplugins-papermill + # jupyter-server + # nbclassic + # notebook +nbdime==3.1.1 + # via -r python/noteable/3.10/requirements.in +nbformat==5.8.0 + # via + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # nbdime + # noteable-origami + # notebook + # papermill +nest-asyncio==1.5.6 + # via + # ipykernel + # nbclassic + # notebook +noteable @ git+https://www.github.com/noteable-io/noteable-notebook-magics.git@a9b922af6f0d96050e1a20f38b92c794ce5fc894 + # via -r python/noteable/3.10/requirements.in +noteable-origami==0.0.11 + # via papermill-origami +notebook==6.5.4 + # via + # fugue-jupyter + # jupyterlab +notebook-shim==0.2.3 + # via nbclassic +numpy==1.23.5 + # via + # databricks-sql-connector + # duckdb-engine + # flytekit + # pandas + # pyarrow + # triad +oauthlib==3.2.2 + # via + # databricks-sql-connector + # requests-oauthlib +openpyxl==3.1.2 + # via + # databricks-sql-connector + # noteable +orjson==3.8.12 + # via noteable-origami +oscrypto==1.3.0 + # via snowflake-connector-python +packaging==23.1 + # via + # build + # dagster + # dagstermill + # docker + # google-cloud-bigquery + # google-cloud-bigquery-storage + # ipykernel + # jupyter-server + # jupyterlab + # jupyterlab-server + # marshmallow + # nbconvert + # redshift-connector + # snowflake-connector-python + # sqlalchemy-bigquery + # sqlalchemy-redshift +pandas==1.5.3 + # via + # databricks-sql-connector + # dx + # flytekit + # fugue + # noteable + # qpd + # scrapbook + # triad +pandocfilters==1.5.0 + # via nbconvert +papermill==2.4.0 + # via + # dagstermill + # flytekitplugins-papermill + # papermill-origami + # scrapbook +papermill-origami==0.0.9 + # via -r python/noteable/3.10/requirements.in +parso==0.8.3 + # via jedi +pendulum==2.1.2 + # via dagster +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.5.1 + # via jupyter-core +prettytable==3.7.0 + # via noteable +prometheus-client==0.16.0 + # via + # jupyter-server + # nbclassic + # notebook +prompt-toolkit==3.0.38 + # via ipython +proto-plus==1.22.2 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage +protobuf==3.20.3 + # via + # dagster + # flyteidl + # flytekit + # google-api-core + # google-cloud-bigquery + # googleapis-common-protos + # grpcio-health-checking + # grpcio-status + # proto-plus + # protoc-gen-swagger +protoc-gen-swagger==0.1.0 + # via flyteidl +psutil==5.9.5 + # via ipykernel +psycopg2==2.9.5 + # via noteable +ptyprocess==0.7.0 + # via + # pexpect + # terminado +pure-eval==0.2.2 + # via stack-data +py==1.11.0 + # via retry +pyarrow[pandas]==8.0.0 + # via + # databricks-sql-connector + # flytekit + # fugue + # noteable + # scrapbook + # sqlalchemy-bigquery + # triad +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pyathena[sqlalchemy]==2.25.2 + # via noteable +pycparser==2.21 + # via cffi +pycryptodomex==3.17 + # via snowflake-connector-python +pydantic==1.10.4 + # via + # dx + # noteable + # noteable-origami + # pyiceberg + # sidecar-comms +pygments==2.15.1 + # via + # ipython + # nbconvert + # nbdime + # rich +pyhive==0.6.5 + # via sqlalchemy-databricks +pyiceberg==0.3.0 + # via -r python/noteable/3.10/requirements.in +pyjwt==2.7.0 + # via + # singlestoredb + # snowflake-connector-python +pymysql==1.0.2 + # via noteable +pyodbc==4.0.39 + # via noteable +pyopenssl==23.1.1 + # via + # flytekit + # snowflake-connector-python +pyparsing==3.0.9 + # via pyiceberg +pyproject-hooks==1.0.0 + # via build +pyrsistent==0.19.3 + # via jsonschema +python-dateutil==2.8.2 + # via + # arrow + # botocore + # croniter + # dagster + # flytekit + # google-cloud-bigquery + # jupyter-client + # kubernetes + # pandas + # pendulum + # pyhive +python-json-logger==2.0.7 + # via + # flytekit + # jupyter-events +python-slugify==8.0.1 + # via cookiecutter +pytimeparse==1.1.8 + # via flytekit +pytz==2023.3 + # via + # clickhouse-driver + # dagster + # flytekit + # pandas + # redshift-connector + # snowflake-connector-python + # trino +pytzdata==2020.1 + # via pendulum +pyyaml==6.0 + # via + # cookiecutter + # dagster + # flytekit + # jupyter-events + # kubernetes + # libcst + # papermill + # pyiceberg + # responses +pyzmq==25.0.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook +qpd==0.4.1 + # via fugue +redshift-connector==2.0.910 + # via noteable +regex==2023.5.5 + # via docker-image-py +requests==2.28.2 + # via + # clickhouse-sqlalchemy + # cookiecutter + # dagster + # databricks-sql-connector + # docker + # flytekit + # google-api-core + # google-cloud-bigquery + # jupyterlab-server + # kubernetes + # nbdime + # papermill + # pyiceberg + # redshift-connector + # requests-oauthlib + # responses + # singlestoredb + # snowflake-connector-python + # trino +requests-oauthlib==1.3.1 + # via kubernetes +responses==0.23.1 + # via flytekit +retry==0.9.2 + # via flytekit +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events +rfc3986[idna2008]==1.5.0 + # via httpx +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rich==13.2.0 + # via + # noteable + # pyiceberg +rsa==4.9 + # via google-auth +rx==1.6.3 + # via dagster +s3transfer==0.6.1 + # via boto3 +scramp==1.4.4 + # via redshift-connector +scrapbook==0.5.0 + # via dagstermill +secretstorage==3.3.3 + # via keyring +send2trash==1.8.2 + # via + # jupyter-server + # nbclassic + # notebook +sidecar-comms @ git+https://www.github.com/noteable-io/sidecar_comms.git@009210009af85640852398dd6965bb7c285e21e2 + # via -r python/noteable/3.10/requirements.in +singlestoredb==0.6.1 + # via sqlalchemy-singlestoredb +six==1.16.0 + # via + # asttokens + # bleach + # fs + # google-auth + # grpcio + # kubernetes + # python-dateutil + # rfc3339-validator + # thrift + # triad +smmap==5.0.0 + # via gitdb +sniffio==1.3.0 + # via + # anyio + # httpcore + # httpx +snowflake-connector-python==3.0.3 + # via snowflake-sqlalchemy +snowflake-sqlalchemy==1.4.7 + # via noteable +sortedcontainers==2.4.0 + # via flytekit +soupsieve==2.4.1 + # via beautifulsoup4 +sqlalchemy==1.4.48 + # via + # alembic + # clickhouse-sqlalchemy + # dagster + # databricks-sql-connector + # duckdb-engine + # noteable + # pyathena + # snowflake-sqlalchemy + # sqlalchemy-bigquery + # sqlalchemy-cockroachdb + # sqlalchemy-databricks + # sqlalchemy-redshift + # sqlalchemy-singlestoredb + # trino +sqlalchemy-bigquery==1.5.0 + # via noteable +sqlalchemy-cockroachdb==1.4.4 + # via noteable +sqlalchemy-databricks==0.2.0 + # via noteable +sqlalchemy-redshift==0.8.12 + # via noteable +sqlalchemy-singlestoredb==0.2.0 + # via noteable +sqlglot==13.2.0 + # via fugue +sqlparams==5.1.0 + # via singlestoredb +sqlparse==0.4.4 + # via noteable +stack-data==0.6.2 + # via ipython +statsd==3.3.0 + # via flytekit +structlog==22.3.0 + # via + # dx + # ipykernel-logging + # noteable + # noteable-origami +tabulate==0.9.0 + # via dagster +tenacity==8.2.2 + # via + # papermill + # pyathena +terminado==0.17.1 + # via + # jupyter-server + # jupyter-server-terminals + # nbclassic + # notebook +text-unidecode==1.3 + # via python-slugify +textwrap3==0.9.2 + # via ansiwrap +thrift==0.16.0 + # via databricks-sql-connector +tinycss2==1.2.1 + # via nbconvert +tomli==2.0.1 + # via + # build + # jupyterlab + # pyproject-hooks +toposort==1.10 + # via dagster +tornado==6.3.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclassic + # nbdime + # notebook + # terminado +tqdm==4.65.0 + # via + # dagster + # noteable + # papermill +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # jupyter-events + # jupyter-server + # matplotlib-inline + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +triad==0.8.7 + # via + # adagio + # fugue + # fugue-sql-antlr + # qpd +trino[sqlalchemy]==0.313.0 + # via noteable +types-pyyaml==6.0.12.9 + # via responses +typing-compat==0.1.0 + # via dagster +typing-extensions==4.5.0 + # via + # alembic + # dagster + # flytekit + # libcst + # pydantic + # snowflake-connector-python + # typing-inspect +typing-inspect==0.8.0 + # via + # dataclasses-json + # libcst +tzlocal==5.0.1 + # via clickhouse-driver +unidiff==0.6.0 + # via noteable +uri-template==1.2.0 + # via jsonschema +urllib3==1.26.15 + # via + # botocore + # docker + # flytekit + # google-auth + # kubernetes + # requests + # responses + # snowflake-connector-python +watchdog==3.0.0 + # via dagster +wcwidth==0.2.6 + # via + # prettytable + # prompt-toolkit +webcolors==1.13 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.5.1 + # via + # docker + # jupyter-server + # kubernetes +websockets==10.4 + # via noteable-origami +wheel==0.40.0 + # via + # flytekit + # singlestoredb +wrapt==1.15.0 + # via + # deprecated + # flytekit +xlrd==2.0.1 + # via noteable +y-py==0.5.9 + # via + # jupyter-ydoc + # ypy-websocket +ypy-websocket==0.8.2 + # via jupyter-server-ydoc +zipp==3.15.0 + # via importlib-metadata +zstandard==0.19.0 + # via pyiceberg + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/python/noteable/3.9/3.9.gpu.requirements.in b/python/noteable/3.9/3.9.gpu.requirements.in new file mode 100755 index 00000000..8ccced15 --- /dev/null +++ b/python/noteable/3.9/3.9.gpu.requirements.in @@ -0,0 +1,2 @@ +-r gpu.requirements.in +# any 3.9-specific requirements for the GPU build should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/noteable/3.9/3.9.requirements.in b/python/noteable/3.9/3.9.requirements.in new file mode 100755 index 00000000..9ec676dd --- /dev/null +++ b/python/noteable/3.9/3.9.requirements.in @@ -0,0 +1,2 @@ +-r requirements.in +# any 3.9-specific requirements should be removed from the *requirements.in file referenced above, and instead added below diff --git a/python/noteable/3.9/Dockerfile b/python/noteable/3.9/Dockerfile new file mode 100755 index 00000000..94ae90f8 --- /dev/null +++ b/python/noteable/3.9/Dockerfile @@ -0,0 +1,44 @@ +# syntax = docker/dockerfile:1.2.1 +# Noteable build: adds packages to enable Noteable-specific functionality: +# - DEX support (via dx) +# - git integration (via gitpython, nbdime) +# - variable explorer, form cells (via sidecar_comms) +ARG BASE_IMAGE +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE} as base + +ARG NBL_PYTHON_VERSION=3.9 + +USER root + +# Prerequisites to install msodbcsql18 driver for MS SQL Server connectivity. +# From https://github.com/MicrosoftDocs/sql-docs/issues/8821#issuecomment-1569088666 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# hadolint ignore=DL3008,DL3009,DL3015 +RUN apt-get update -y && \ + apt-get install gnupg2 lsb-release -y && \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-archive-keyring.gpg && \ + echo "deb [arch=amd64,armhf,arm64 signed-by=/usr/share/keyrings/microsoft-archive-keyring.gpg] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod bullseye main" > /etc/apt/sources.list.d/mssql-release.list + +# hadolint ignore=DL3045 +COPY Aptfile . +RUN /usr/bin/apt-install Aptfile + +COPY requirements.txt /tmp/noteable_requirements.txt +RUN pip install --no-cache-dir -r /tmp/noteable_requirements.txt + +# Smoke test to ensure packages were installed properly +# hadolint ignore=DL3059 +RUN python -c "import dx, noteable, psutil, sidecar_comms" + +COPY .pythonrc /srv/noteable/. +COPY ipython_config.py /etc/ipython +COPY git_credential_helper.py /git_credential_helper.py +COPY git-wrapper.sh /usr/local/bin/git + +USER noteable + +FROM base as gpu + +COPY gpu.requirements.txt /tmp/noteable_gpu_requirements.txt +RUN pip install --no-cache-dir -r /tmp/noteable_gpu_requirements.txt diff --git a/python/noteable/3.9/gpu.requirements.txt b/python/noteable/3.9/gpu.requirements.txt new file mode 100644 index 00000000..ccea0619 --- /dev/null +++ b/python/noteable/3.9/gpu.requirements.txt @@ -0,0 +1,6 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/noteable/3.9/gpu.requirements.txt --resolver=backtracking python/noteable/3.9/3.9.gpu.requirements.in +# diff --git a/python/noteable/3.9/requirements.txt b/python/noteable/3.9/requirements.txt new file mode 100644 index 00000000..7b720faf --- /dev/null +++ b/python/noteable/3.9/requirements.txt @@ -0,0 +1,1001 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=python/noteable/3.9/requirements.txt --resolver=backtracking python/noteable/3.9/3.9.requirements.in +# +adagio==0.2.4 + # via + # fugue + # qpd +aiofiles==22.1.0 + # via ypy-websocket +aiosqlite==0.19.0 + # via ypy-websocket +alembic==1.11.1 + # via + # dagster + # databricks-sql-connector +ansiwrap==0.8.4 + # via papermill +antlr4-python3-runtime==4.11.1 + # via + # fugue-sql-antlr + # qpd +anyio==3.6.2 + # via + # httpcore + # jupyter-server +appdirs==1.4.4 + # via fs +argon2-cffi==21.3.0 + # via + # jupyter-server + # nbclassic + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.2.3 + # via + # isoduration + # jinja2-time +asn1crypto==1.5.1 + # via + # oscrypto + # scramp + # snowflake-connector-python +asttokens==2.2.1 + # via stack-data +attrs==23.1.0 + # via jsonschema +babel==2.12.1 + # via jupyterlab-server +backcall==0.2.0 + # via ipython +backoff==2.2.1 + # via + # noteable + # noteable-origami +beautifulsoup4==4.12.2 + # via + # nbconvert + # redshift-connector +binaryornot==0.4.4 + # via cookiecutter +bitmath==1.3.3.1 + # via noteable-origami +bleach==6.0.0 + # via nbconvert +boto3==1.26.136 + # via + # pyathena + # redshift-connector +botocore==1.29.136 + # via + # boto3 + # pyathena + # redshift-connector + # s3transfer +build==0.10.0 + # via singlestoredb +cachetools==5.3.0 + # via google-auth +certifi==2023.5.7 + # via + # httpcore + # httpx + # kubernetes + # requests + # snowflake-connector-python +cffi==1.15.1 + # via + # argon2-cffi-bindings + # cryptography + # snowflake-connector-python +chardet==5.1.0 + # via binaryornot +charset-normalizer==2.1.1 + # via + # requests + # snowflake-connector-python +click==8.1.3 + # via + # cookiecutter + # dagster + # flytekit + # noteable + # papermill + # pyiceberg +clickhouse-driver==0.2.6 + # via clickhouse-sqlalchemy +clickhouse-sqlalchemy==0.2.3 + # via noteable +cloudpickle==2.2.0 + # via + # -r python/noteable/3.9/requirements.in + # flytekit + # papermill-origami +colorama==0.4.6 + # via nbdime +coloredlogs==14.0 + # via dagster +comm==0.1.3 + # via ipykernel +cookiecutter==2.1.1 + # via flytekit +croniter==1.3.14 + # via + # dagster + # flytekit +cryptography==40.0.2 + # via + # jwt + # pyopenssl + # secretstorage + # snowflake-connector-python +dagster==1.0.15 + # via dagstermill +dagstermill==0.16.15 + # via -r python/noteable/3.9/requirements.in +databricks-sql-connector==2.5.2 + # via sqlalchemy-databricks +dataclasses-json==0.5.7 + # via flytekit +debugpy==1.6.7 + # via ipykernel +decorator==5.1.1 + # via + # ipython + # retry +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via flytekit +diskcache==5.6.1 + # via flytekit +docker==6.1.2 + # via flytekit +docker-image-py==0.1.12 + # via flytekit +docstring-parser==0.15 + # via + # dagster + # flytekit +duckdb==0.8.0 + # via duckdb-engine +duckdb-engine==0.6.9 + # via + # dx + # noteable +dx @ git+https://www.github.com/noteable-io/dx.git@a9a4310e51236833e818f3d81edc06a2c2fa3b0e + # via -r python/noteable/3.9/requirements.in +entrypoints==0.4 + # via papermill +et-xmlfile==1.1.0 + # via openpyxl +exceptiongroup==1.1.1 + # via dx +executing==1.2.0 + # via stack-data +fastjsonschema==2.16.3 + # via nbformat +filelock==3.12.0 + # via snowflake-connector-python +flyteidl==1.2.10 + # via flytekit +flytekit==1.2.11 + # via flytekitplugins-papermill +flytekitplugins-papermill==1.2.4 + # via -r python/noteable/3.9/requirements.in +fqdn==1.5.1 + # via jsonschema +fs==2.4.16 + # via triad +fsspec==2023.1.0 + # via + # pyathena + # pyiceberg +fugue==0.8.3 + # via + # -r python/noteable/3.9/requirements.in + # fugue-jupyter +fugue-jupyter==0.2.3 + # via -r python/noteable/3.9/requirements.in +fugue-sql-antlr==0.1.6 + # via fugue +future==0.18.3 + # via + # pyhive + # sqlalchemy-bigquery +gitdb==4.0.10 + # via gitpython +gitpython==3.1.31 + # via + # flytekit + # nbdime + # noteable +google-api-core[grpc]==2.11.0 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-core + # sqlalchemy-bigquery +google-auth==2.18.1 + # via + # google-api-core + # google-cloud-core + # kubernetes + # sqlalchemy-bigquery +google-cloud-bigquery==3.10.0 + # via sqlalchemy-bigquery +google-cloud-bigquery-storage==2.6.3 + # via + # noteable + # sqlalchemy-bigquery +google-cloud-core==2.3.2 + # via google-cloud-bigquery +google-crc32c==1.5.0 + # via google-resumable-media +google-resumable-media==2.5.0 + # via google-cloud-bigquery +googleapis-common-protos==1.59.0 + # via + # flyteidl + # google-api-core + # grpcio-status +graphviz==0.20.1 + # via -r python/noteable/3.9/requirements.in +greenlet==1.1.3.post0 + # via + # noteable + # sqlalchemy +grpcio==1.47.5 + # via + # dagster + # flytekit + # google-api-core + # google-cloud-bigquery + # grpcio-health-checking + # grpcio-status +grpcio-health-checking==1.43.0 + # via dagster +grpcio-status==1.47.5 + # via + # flytekit + # google-api-core +h11==0.14.0 + # via httpcore +h2==4.1.0 + # via httpx +hpack==4.0.0 + # via h2 +httpcore==0.16.3 + # via httpx +httpx[http2]==0.23.3 + # via + # noteable + # noteable-origami +humanfriendly==10.0 + # via coloredlogs +hyperframe==6.0.1 + # via h2 +idna==3.4 + # via + # anyio + # jsonschema + # requests + # rfc3986 + # snowflake-connector-python +importlib-metadata==6.6.0 + # via + # flytekit + # jupyter-client + # jupyter-lsp + # jupyter-ydoc + # jupyterlab-server + # keyring + # nbconvert +ipykernel==6.23.1 + # via + # dagstermill + # flytekitplugins-papermill + # ipython + # nbclassic + # notebook + # sidecar-comms +ipykernel-logging @ git+https://www.github.com/noteable-io/ipykernel-logging.git@b5ae61c0c99ba8e1a6740fd24b123bc580efe53a + # via -r python/noteable/3.9/requirements.in +ipython[kernel]==8.13.2 + # via + # dx + # fugue-jupyter + # ipykernel + # jupyterlab + # noteable + # scrapbook +ipython-genutils==0.2.0 + # via + # dagstermill + # nbclassic + # notebook +isoduration==20.11.0 + # via jsonschema +jaraco-classes==3.2.3 + # via keyring +jedi==0.18.2 + # via ipython +jeepney==0.8.0 + # via + # keyring + # secretstorage +jinja2==3.1.2 + # via + # cookiecutter + # dagster + # fugue + # fugue-sql-antlr + # jinja2-time + # jinjasql + # jupyter-server + # jupyterlab + # jupyterlab-server + # nbclassic + # nbconvert + # nbdime + # notebook +jinja2-time==0.2.0 + # via cookiecutter +jinjasql @ git+https://github.com/yakhu/jinjasql.git@f8c62d1bea97d0320bb3676f4f83bd2357ccfe55 + # via noteable +jmespath==1.0.1 + # via + # boto3 + # botocore +joblib==1.2.0 + # via flytekit +json5==0.9.14 + # via jupyterlab-server +jsonpointer==2.3 + # via jsonschema +jsonschema[format-nongpl]==4.17.3 + # via + # jupyter-events + # jupyterlab-server + # nbformat + # scrapbook +jupyter-client==8.2.0 + # via + # ipykernel + # jupyter-server + # nbclassic + # nbclient + # notebook +jupyter-core==5.3.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +jupyter-events==0.6.3 + # via + # jupyter-server + # jupyter-server-fileid +jupyter-lsp==2.1.0 + # via jupyterlab-lsp +jupyter-server==2.5.0 + # via + # jupyter-lsp + # jupyter-server-fileid + # jupyter-server-mathjax + # jupyterlab + # jupyterlab-server + # nbclassic + # nbdime + # notebook-shim +jupyter-server-fileid==0.9.0 + # via jupyter-server-ydoc +jupyter-server-mathjax==0.2.6 + # via nbdime +jupyter-server-terminals==0.4.4 + # via jupyter-server +jupyter-server-ydoc==0.8.0 + # via jupyterlab +jupyter-ydoc==0.2.4 + # via + # jupyter-server-ydoc + # jupyterlab +jupyterlab==3.6.3 + # via + # fugue-jupyter + # jupyterlab-lsp +jupyterlab-lsp==3.10.2 + # via fugue-jupyter +jupyterlab-pygments==0.2.2 + # via nbconvert +jupyterlab-server==2.22.1 + # via jupyterlab +jwt==1.3.1 + # via noteable-origami +keyring==23.13.1 + # via flytekit +kubernetes==26.1.0 + # via flytekit +libcst==0.4.9 + # via google-cloud-bigquery-storage +lxml==4.9.2 + # via redshift-connector +lz4==4.3.2 + # via databricks-sql-connector +mako==1.2.4 + # via alembic +markdown-it-py==2.2.0 + # via rich +markupsafe==2.1.2 + # via + # jinja2 + # mako + # nbconvert +marshmallow==3.19.0 + # via + # dataclasses-json + # marshmallow-enum + # marshmallow-jsonschema +marshmallow-enum==1.5.1 + # via dataclasses-json +marshmallow-jsonschema==0.13.0 + # via flytekit +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +mdurl==0.1.2 + # via markdown-it-py +mistune==2.0.5 + # via nbconvert +mmhash3==3.0.1 + # via pyiceberg +more-itertools==9.1.0 + # via jaraco-classes +mypy-extensions==1.0.0 + # via typing-inspect +mysqlclient==2.1.1 + # via noteable +natsort==8.3.1 + # via flytekit +nbclassic==1.0.0 + # via + # jupyterlab + # notebook +nbclient==0.7.4 + # via + # nbconvert + # papermill +nbconvert==7.4.0 + # via + # flytekitplugins-papermill + # jupyter-server + # nbclassic + # notebook +nbdime==3.1.1 + # via -r python/noteable/3.9/requirements.in +nbformat==5.8.0 + # via + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # nbdime + # noteable-origami + # notebook + # papermill +nest-asyncio==1.5.6 + # via + # ipykernel + # nbclassic + # notebook +noteable @ git+https://www.github.com/noteable-io/noteable-notebook-magics.git@a9b922af6f0d96050e1a20f38b92c794ce5fc894 + # via -r python/noteable/3.9/requirements.in +noteable-origami==0.0.11 + # via papermill-origami +notebook==6.5.4 + # via + # fugue-jupyter + # jupyterlab +notebook-shim==0.2.3 + # via nbclassic +numpy==1.23.5 + # via + # databricks-sql-connector + # duckdb-engine + # flytekit + # pandas + # pyarrow + # triad +oauthlib==3.2.2 + # via + # databricks-sql-connector + # requests-oauthlib +openpyxl==3.1.2 + # via + # databricks-sql-connector + # noteable +orjson==3.8.12 + # via noteable-origami +oscrypto==1.3.0 + # via snowflake-connector-python +packaging==23.1 + # via + # build + # dagster + # dagstermill + # docker + # google-cloud-bigquery + # google-cloud-bigquery-storage + # ipykernel + # jupyter-server + # jupyterlab + # jupyterlab-server + # marshmallow + # nbconvert + # redshift-connector + # snowflake-connector-python + # sqlalchemy-bigquery + # sqlalchemy-redshift +pandas==1.5.3 + # via + # databricks-sql-connector + # dx + # flytekit + # fugue + # noteable + # qpd + # scrapbook + # triad +pandocfilters==1.5.0 + # via nbconvert +papermill==2.4.0 + # via + # dagstermill + # flytekitplugins-papermill + # papermill-origami + # scrapbook +papermill-origami==0.0.9 + # via -r python/noteable/3.9/requirements.in +parso==0.8.3 + # via jedi +pendulum==2.1.2 + # via dagster +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +platformdirs==3.5.1 + # via jupyter-core +prettytable==3.7.0 + # via noteable +prometheus-client==0.16.0 + # via + # jupyter-server + # nbclassic + # notebook +prompt-toolkit==3.0.38 + # via ipython +proto-plus==1.22.2 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage +protobuf==3.20.3 + # via + # dagster + # flyteidl + # flytekit + # google-api-core + # google-cloud-bigquery + # googleapis-common-protos + # grpcio-health-checking + # grpcio-status + # proto-plus + # protoc-gen-swagger +protoc-gen-swagger==0.1.0 + # via flyteidl +psutil==5.9.5 + # via ipykernel +psycopg2==2.9.5 + # via noteable +ptyprocess==0.7.0 + # via + # pexpect + # terminado +pure-eval==0.2.2 + # via stack-data +py==1.11.0 + # via retry +pyarrow[pandas]==8.0.0 + # via + # databricks-sql-connector + # flytekit + # fugue + # noteable + # scrapbook + # sqlalchemy-bigquery + # triad +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pyathena[sqlalchemy]==2.25.2 + # via noteable +pycparser==2.21 + # via cffi +pycryptodomex==3.17 + # via snowflake-connector-python +pydantic==1.10.4 + # via + # dx + # noteable + # noteable-origami + # pyiceberg + # sidecar-comms +pygments==2.15.1 + # via + # ipython + # nbconvert + # nbdime + # rich +pyhive==0.6.5 + # via sqlalchemy-databricks +pyiceberg==0.3.0 + # via -r python/noteable/3.9/requirements.in +pyjwt==2.7.0 + # via + # singlestoredb + # snowflake-connector-python +pymysql==1.0.2 + # via noteable +pyodbc==4.0.39 + # via noteable +pyopenssl==23.1.1 + # via + # flytekit + # snowflake-connector-python +pyparsing==3.0.9 + # via pyiceberg +pyproject-hooks==1.0.0 + # via build +pyrsistent==0.19.3 + # via jsonschema +python-dateutil==2.8.2 + # via + # arrow + # botocore + # croniter + # dagster + # flytekit + # google-cloud-bigquery + # jupyter-client + # kubernetes + # pandas + # pendulum + # pyhive +python-json-logger==2.0.7 + # via + # flytekit + # jupyter-events +python-slugify==8.0.1 + # via cookiecutter +pytimeparse==1.1.8 + # via flytekit +pytz==2023.3 + # via + # clickhouse-driver + # dagster + # flytekit + # pandas + # redshift-connector + # snowflake-connector-python + # trino +pytzdata==2020.1 + # via pendulum +pyyaml==6.0 + # via + # cookiecutter + # dagster + # flytekit + # jupyter-events + # kubernetes + # libcst + # papermill + # pyiceberg + # responses +pyzmq==25.0.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook +qpd==0.4.1 + # via fugue +redshift-connector==2.0.910 + # via noteable +regex==2023.5.5 + # via docker-image-py +requests==2.28.2 + # via + # clickhouse-sqlalchemy + # cookiecutter + # dagster + # databricks-sql-connector + # docker + # flytekit + # google-api-core + # google-cloud-bigquery + # jupyterlab-server + # kubernetes + # nbdime + # papermill + # pyiceberg + # redshift-connector + # requests-oauthlib + # responses + # singlestoredb + # snowflake-connector-python + # trino +requests-oauthlib==1.3.1 + # via kubernetes +responses==0.23.1 + # via flytekit +retry==0.9.2 + # via flytekit +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events +rfc3986[idna2008]==1.5.0 + # via httpx +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rich==13.2.0 + # via + # noteable + # pyiceberg +rsa==4.9 + # via google-auth +rx==1.6.3 + # via dagster +s3transfer==0.6.1 + # via boto3 +scramp==1.4.4 + # via redshift-connector +scrapbook==0.5.0 + # via dagstermill +secretstorage==3.3.3 + # via keyring +send2trash==1.8.2 + # via + # jupyter-server + # nbclassic + # notebook +sidecar-comms @ git+https://www.github.com/noteable-io/sidecar_comms.git@009210009af85640852398dd6965bb7c285e21e2 + # via -r python/noteable/3.9/requirements.in +singlestoredb==0.6.1 + # via sqlalchemy-singlestoredb +six==1.16.0 + # via + # asttokens + # bleach + # fs + # google-auth + # grpcio + # kubernetes + # python-dateutil + # rfc3339-validator + # thrift + # triad +smmap==5.0.0 + # via gitdb +sniffio==1.3.0 + # via + # anyio + # httpcore + # httpx +snowflake-connector-python==3.0.3 + # via snowflake-sqlalchemy +snowflake-sqlalchemy==1.4.7 + # via noteable +sortedcontainers==2.4.0 + # via flytekit +soupsieve==2.4.1 + # via beautifulsoup4 +sqlalchemy==1.4.48 + # via + # alembic + # clickhouse-sqlalchemy + # dagster + # databricks-sql-connector + # duckdb-engine + # noteable + # pyathena + # snowflake-sqlalchemy + # sqlalchemy-bigquery + # sqlalchemy-cockroachdb + # sqlalchemy-databricks + # sqlalchemy-redshift + # sqlalchemy-singlestoredb + # trino +sqlalchemy-bigquery==1.5.0 + # via noteable +sqlalchemy-cockroachdb==1.4.4 + # via noteable +sqlalchemy-databricks==0.2.0 + # via noteable +sqlalchemy-redshift==0.8.12 + # via noteable +sqlalchemy-singlestoredb==0.2.0 + # via noteable +sqlglot==13.2.0 + # via fugue +sqlparams==5.1.0 + # via singlestoredb +sqlparse==0.4.4 + # via noteable +stack-data==0.6.2 + # via ipython +statsd==3.3.0 + # via flytekit +structlog==22.3.0 + # via + # dx + # ipykernel-logging + # noteable + # noteable-origami +tabulate==0.9.0 + # via dagster +tenacity==8.2.2 + # via + # papermill + # pyathena +terminado==0.17.1 + # via + # jupyter-server + # jupyter-server-terminals + # nbclassic + # notebook +text-unidecode==1.3 + # via python-slugify +textwrap3==0.9.2 + # via ansiwrap +thrift==0.16.0 + # via databricks-sql-connector +tinycss2==1.2.1 + # via nbconvert +tomli==2.0.1 + # via + # build + # jupyterlab + # pyproject-hooks +toposort==1.10 + # via dagster +tornado==6.3.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclassic + # nbdime + # notebook + # terminado +tqdm==4.65.0 + # via + # dagster + # noteable + # papermill +traitlets==5.9.0 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # jupyter-events + # jupyter-server + # matplotlib-inline + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +triad==0.8.7 + # via + # adagio + # fugue + # fugue-sql-antlr + # qpd +trino[sqlalchemy]==0.313.0 + # via noteable +types-pyyaml==6.0.12.9 + # via responses +typing-compat==0.1.0 + # via dagster +typing-extensions==4.5.0 + # via + # alembic + # dagster + # flytekit + # ipython + # libcst + # pydantic + # snowflake-connector-python + # typing-inspect +typing-inspect==0.8.0 + # via + # dataclasses-json + # libcst +tzlocal==5.0.1 + # via clickhouse-driver +unidiff==0.6.0 + # via noteable +uri-template==1.2.0 + # via jsonschema +urllib3==1.26.15 + # via + # botocore + # docker + # flytekit + # google-auth + # kubernetes + # requests + # responses + # snowflake-connector-python +watchdog==3.0.0 + # via dagster +wcwidth==0.2.6 + # via + # prettytable + # prompt-toolkit +webcolors==1.13 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.5.1 + # via + # docker + # jupyter-server + # kubernetes +websockets==10.4 + # via noteable-origami +wheel==0.40.0 + # via + # flytekit + # singlestoredb +wrapt==1.15.0 + # via + # deprecated + # flytekit +xlrd==2.0.1 + # via noteable +y-py==0.5.9 + # via + # jupyter-ydoc + # ypy-websocket +ypy-websocket==0.8.2 + # via jupyter-server-ydoc +zipp==3.15.0 + # via importlib-metadata +zstandard==0.19.0 + # via pyiceberg + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/python/noteable/Aptfile b/python/noteable/Aptfile new file mode 100644 index 00000000..8009c1c8 --- /dev/null +++ b/python/noteable/Aptfile @@ -0,0 +1,6 @@ +default-libmysqlclient-dev +libpq-dev +lsb-release +msodbcsql18 +fonts-noto-cjk +graphviz diff --git a/git-wrapper.sh b/python/noteable/git-wrapper.sh old mode 100644 new mode 100755 similarity index 100% rename from git-wrapper.sh rename to python/noteable/git-wrapper.sh diff --git a/git_credential_helper.py b/python/noteable/git_credential_helper.py old mode 100644 new mode 100755 similarity index 100% rename from git_credential_helper.py rename to python/noteable/git_credential_helper.py diff --git a/python/noteable/gpu.requirements.in b/python/noteable/gpu.requirements.in new file mode 100755 index 00000000..e69de29b diff --git a/python/noteable/ipython_config.py b/python/noteable/ipython_config.py new file mode 100755 index 00000000..e14bc079 --- /dev/null +++ b/python/noteable/ipython_config.py @@ -0,0 +1,36 @@ +from ipykernel_logging.main import configure_log_formatter + +c.InteractiveShellApp.extensions = [ + "noteable", +] + +c.SqlMagic.feedback = False +c.SqlMagic.autopandas = True +c.NTBLMagic.project_dir = "/etc/noteable/project" +c.NoteableDataLoaderMagic.return_head = False +c.IPythonKernel._execute_sleep = 0.15 +# 10 minutes to support large files +c.NTBLMagic.planar_ally_default_timeout_seconds = 600 + +c.Application.logging_config = { + "version": 1, + "formatters": { + "structlog": { + "()": configure_log_formatter, + }, + }, + "handlers": { + "default": { + "class": "logging.StreamHandler", + "formatter": "structlog", + # if this is changed to stdout, we'll see logs in cell outputs + "stream": "ext://sys.stderr", + } + }, + "loggers": { + "IPKernelApp": { + "handlers": ["default"], + "level": "DEBUG", + }, + }, +} diff --git a/python/noteable/requirements.in b/python/noteable/requirements.in new file mode 100755 index 00000000..968979f1 --- /dev/null +++ b/python/noteable/requirements.in @@ -0,0 +1,21 @@ +# Scheduler/ochestration packages +dagstermill==0.16.15 +papermill-origami==0.0.9 +cloudpickle==2.2.0 +flytekitplugins-papermill==1.2.4 +# Added for ease of use with integration partners +pyiceberg==0.3.0 +fugue==0.8.3 +fugue-jupyter==0.2.3 +# Diffing for notebooks +nbdime==3.1.1 +# Making graphviz visualizations/diagrams +graphviz==0.20.1 + +# https://github.com/noteable-io/ packages +git+https://www.github.com/noteable-io/dx.git@a9a4310e51236833e818f3d81edc06a2c2fa3b0e +git+https://www.github.com/noteable-io/noteable-notebook-magics.git@a9b922af6f0d96050e1a20f38b92c794ce5fc894 +git+https://www.github.com/noteable-io/sidecar_comms.git@009210009af85640852398dd6965bb7c285e21e2 +git+https://www.github.com/noteable-io/ipykernel-logging.git@b5ae61c0c99ba8e1a6740fd24b123bc580efe53a + +# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) diff --git a/python/run.sh b/python/run.sh new file mode 100755 index 00000000..d93d0a9d --- /dev/null +++ b/python/run.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -o pipefail +set -o nounset +set -o errexit + +echo "Local time: $(date)" + +set -x + +connection_file=/tmp/connection_file.json +cp /etc/noteable/connections/connection_file.json ${connection_file} + +# Inject Secrets into environment (see script docstring for more info) +# set +x to avoid echoing the Secrets in plaintext to logs +set +x +echo "Injecting Secrets into environment, echoing is turned off" +# shellcheck disable=SC1091 +source /tmp/secrets_helper.sh +echo "Done injecting Secrets, turning echoing back on" +set -x + +echo "Starting Python kernel" +# https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP +export PYTHONSTARTUP=~/.pythonrc + +exec python -m ipykernel_launcher -f ${connection_file} --debug diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 110ef814..00000000 --- a/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -# Scheduler/ochestration packages -dagstermill==0.16.15 -papermill-origami==0.0.9 -cloudpickle==2.2.0 -flytekitplugins-papermill==1.2.4 - -# https://github.com/noteable-io/ packages -git+https://www.github.com/noteable-io/dx.git@a7df2821182293546d7d7a9ede3cdcc0c946d570 -git+https://www.github.com/noteable-io/noteable-notebook-magics.git@a6a6801da8ea7ccb72b9f354d4780699c3d99f73 -git+https://www.github.com/noteable-io/sidecar_comms.git@6ee04efe60b855c465727f120f8f50a7bfa60097 - -# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) \ No newline at end of file diff --git a/scripts/apt-install b/scripts/apt-install new file mode 100755 index 00000000..adf63f20 --- /dev/null +++ b/scripts/apt-install @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +APTFILE=${1} + +apt-get update -y +# shellcheck disable=SC2013,SC2086 +for package in $(cat ${APTFILE} | grep -v -s -e '^#' | grep -v -s -e "^:repo":); do + # Accept End User License Agreement if needed + # (needed for some packages like msodbcsql18) + ACCEPT_EULA=Y apt-get install -y "${package}" +done +apt-get clean +rm -rf /var/lib/apt/lists/* diff --git a/scripts/secrets_helper.sh b/scripts/secrets_helper.sh new file mode 100644 index 00000000..15813008 --- /dev/null +++ b/scripts/secrets_helper.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# This script helps inject Secrets into the Kernel environment. +# The Vault Agent will volume mount files into the Kernel container +# at /vault/secrets. Noteable Secrets will be in .env suffix files. +# We want to parse all those files and export them as environment variables +# in the bash script that kicks off the Kernel (ipykernel_launcher etc). + +secrets_directory=${VAULT_SECRETS_PATH:-/vault/secrets} + +shopt -s nullglob + +if [ -d "$secrets_directory" ]; then + # shellcheck disable=SC2231 + for file in $secrets_directory/*.env; do + if [ -f "$file" ]; then + name=$(basename "$file" .env) + name=${name^^} # Convert to uppercase + if ! declare -p "$name" > /dev/null 2>&1; then # If variable is not set + content=$(cat "$file") + export "$name=$content" + fi + fi + done +fi + +shopt -u nullglob diff --git a/secrets_helper.py b/secrets_helper.py deleted file mode 100644 index a832841a..00000000 --- a/secrets_helper.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -This script helps inject Secrets into the Kernel environment. -The Vault Agent will volume mount files into the Kernel container -at /vault/secrets. Noteable Secrets will be in .env suffix files. -We want to parse all those files and export them as environment variables -in the bash script that kicks off the Kernel (ipykernel_launcher etc). -Doing that scripting in bash is a pain, so we do it in Python here and -bash just does an `eval` on the output. -Some defensive programming to highlight: - - Env vars in the output are all uppercased - - If an env var is already set, we don't overwrite it - - We use shlex to quote the output so bash eval does not cause nasty side effects -""" -import os -import pathlib -import shlex - -output = [] - -secrets_directory = os.environ.get("VAULT_SECRETS_PATH", "/vault/secrets") - -directory = pathlib.Path(secrets_directory) -if directory.exists(): - for file in directory.glob("*.env"): - name = file.stem.upper() - if name not in os.environ: - content = file.read_text() - output.append(f"export {name}={shlex.quote(content)}") - -print("\n".join(output)) \ No newline at end of file