Initialize project with base kernel (#2)

* Add initial builds for python kernel * Add python version to Dockerfile * Fix pull requests * Fix workflow reference * Fix workflow reference again * Prefix with ./ * Fix dockerfile / context args * Add Dockerhub secrets * Add actions:write * Set python version as environment variable * Use build args to set python version * Disable ARM build * Use common files layout from legacy project * Fix makefile script * Add kernels/ prefix to copy paths * Fix file paths * Fix ipython path * Move environment.txt to kernel * Reenable arm builds * Remove julia fix-permissions * Disable arm builds again * Remove default build labels * Remove unsued Make scripts * Remove duplicate linting block * Add missing newline * Add comment regarding docker image versions
noteable-io · Feb 10, 2023 · 270355d · 270355d
1 parent f27c3dd
commit 270355d
Show file tree

Hide file tree

Showing 12 changed files with 510 additions and 0 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,57 @@
+name: Build kernel images
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ base-linting:
+ name: base-linting
+ runs-on: ubuntu-22.04
+
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@v3
+
+ - name: Lint Dockerfile, Shell scripts, YAML
+ uses: github/super-linter@v4
+ env:
+ DEFAULT_BRANCH: master
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ # Linters to enable
+ VALIDATE_BASH: true
+ VALIDATE_BASH_EXEC: true
+ VALIDATE_DOCKERFILE_HADOLINT: true
+ VALIDATE_YAML: true
+
+ build_python_kernel:
+ permissions:
+ id-token: write
+ contents: read
+ packages: write
+ actions: write
+ uses: ./.github/workflows/reusable-docker-build.yml
+ strategy:
+ matrix:
+ # Must be a supported version by jupyter/datascience-notebook
+ # https://hub.docker.com/r/jupyter/datascience-notebook/tags?page=1&name=python-
+ version: [ "3.9.13", "3.8.13" ]
+ secrets: inherit
+ with:
+ dockerfile: ./kernels/python/Dockerfile
+ context: ./kernels/python
+ images: |
+ ghcr.io/${{ github.repository }}/python
+ tags: |
+ type=ref,event=branch,prefix=${{ matrix.version }}
+ type=ref,event=pr,prefix=${{ matrix.version }}
+ type=sha,format=long,prefix=${{ matrix.version }}
+ type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }},prefix=${{ matrix.version }}
+ build_args: |
+ PYTHON_VERSION=${{ matrix.version }}
+ platforms: "linux/amd64"
diff --git a/.github/workflows/reusable-docker-build.yml b/.github/workflows/reusable-docker-build.yml
@@ -0,0 +1,142 @@
+name: docker
+
+on:
+ workflow_call:
+ inputs:
+ dockerfile:
+ description: "Path to the Dockerfile to build"
+ type: string
+ default: Dockerfile
+ context:
+ description: "The context for Docker build"
+ type: string
+ default: "."
+ platforms:
+ description: "Comma separate list of platforms to build on"
+ type: string
+ required: false
+ default: "linux/amd64,linux/arm64"
+ images:
+ description: "The image names that we want to build"
+ type: string
+ required: true
+ tags:
+ description: "The various tags to be attached to the built image"
+ type: string
+ required: false
+ default: ""
+ labels:
+ description: "The various labels to attach to the built image"
+ type: string
+ required: false
+ default: |
+ org.opencontainers.image.url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ org.opencontainers.image.vendor=Noteable
+ org.opencontainers.image.version=${{ github.ref }}
+ target:
+ description: "Sets the target stage to build"
+ type: string
+ required: false
+ build_args:
+ description: "Additional build args to pass to the Docker build"
+ type: string
+ required: false
+ default: ""
+ secrets:
+ # We login to Dockerhub to prevent rate limiting issues when pulling images
+ # https://docs.docker.com/docker-hub/download-rate-limit/
+ DOCKERHUB_USER:
+ required: true
+ DOCKERHUB_PASSWORD:
+ required: true
+
+jobs:
+ build:
+ permissions:
+ id-token: write
+ contents: read
+ packages: write
+
+ if: |
+ github.event_name == 'push' ||
+ (github.event_name == 'pull_request' && github.event.pull_request.state == 'open')
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Checkout the code
+ uses: actions/checkout@v3
+
+ - name: Copy common files
+ run: make copy-common-files
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKERHUB_USER }}
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
+
+ - name: Log in to the Container registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+ with:
+ version: v0.10.1
+
+ # Note: The outputs in github action will show duplicate labels being generated for the meta outputs.
+ # When the Docker engine builds, it will only take the later values, and our custom labels get added
+ # at the end. https://github.com/docker/metadata-action/issues/125
+ - name: Docker metadata for labels and tags
+ id: meta
+ uses: docker/metadata-action@v4
+ with:
+ images: ${{ inputs.images }}
+ tags: ${{ inputs.tags }}
+ labels: ${{ inputs.labels }}
+
+ - name: Build and push
+ uses: docker/build-push-action@v3
+ with:
+ platforms: ${{ inputs.platforms }}
+ context: ${{ inputs.context }}
+ push: true
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ target: ${{ inputs.target }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ build-args: ${{ inputs.build_args }}
+
+ clear_cache:
+ permissions:
+ contents: read
+ actions: write
+ # If the PR is closed (or merged), we want to clear the cache
+ if: ${{ github.event_name == 'pull_request' && github.event.pull_request.state == 'closed' }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out code
+ uses: actions/checkout@v3
+
+ - name: Cleanup
+ run: |
+ gh extension install actions/gh-actions-cache
+
+ REPO=${{ github.repository }}
+ BRANCH=${{ github.ref }}
+
+ echo "Fetching list of cache key"
+ cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
+
+ ## Setting this to not fail the workflow while deleting cache keys.
+ set +e
+ echo "Deleting caches..."
+ for cacheKey in $cacheKeysForPR
+ do
+ gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
+ done
+ echo "Done"
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/Makefile b/Makefile
@@ -0,0 +1,6 @@
+copy-common-files:
+ cp requirements.txt kernels/python/
+ cp ipython_config.py kernels/python/
+ cp secrets_helper.py kernels/python/
+ cp git_credential_helper.py kernels/python/
+ cp git-wrapper.sh kernels/python/
diff --git a/git-wrapper.sh b/git-wrapper.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# This script wraps git to only allow certain commands to be run.
+# We mainly want to prevent users from getting into unknown states by checking out other branches, etc.
+
+# Allowed command list
+allowed_commands=( "commit" "pull" "push" "status" "diff" "add" "fetch" "log" "version" )
+
+# Check if the command is allowed
+# shellcheck disable=SC2076
+if [[ ! " ${allowed_commands[*]} " =~ " ${1} " ]]; then
+ echo "That git command is not allowed, contact [email protected] if you think this is a mistake."
+ exit 1
+fi
+
+# Otherwise pass through to git at /usr/bin/git
+exec /usr/bin/git "$@"
diff --git a/git_credential_helper.py b/git_credential_helper.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+"""
+This script is used as a Git credential helper https://git-scm.com/docs/git-credential.
+We iterate through all the git credential secrets on the file system and return the first one that matches the requested URL.
+If no match is found, we return an empty response.
+An empty response will cause Git to use the next credential helper in the list, or prompt the user for credentials.
+To test this script:
+$ cat > /tmp/demo.git-cred <<EOF
+{
+ "meta": {
+ "type": "USERNAME_PASSWORD",
+ "host": "github.com",
+ "protocol": "https",
+ "path": "foo/bar"
+ },
+ "data": {
+ "username": "demo",
+ "password": "demo_password"
+ }
+}
+EOF
+$ export NTBL_SECRETS_DIR=/tmp
+$ echo -e "host=github.com\nprotocol=https\npath=foo/bar" | ./git_credential_helper.py
+username=demo
+password=demo_password
+"""
+
+import json
+from pathlib import Path
+import sys
+import os
+from typing import Optional
+
+
+def parse_input(input_: str) -> dict:
+ """Parse the input from Git into a dictionary."""
+ return dict(line.split("=", 1) for line in input_.splitlines())
+
+
+def format_output(data: dict) -> str:
+ """Format the output to Git."""
+ return "\n".join(f"{key}={value}" for key, value in data.items())
+
+
+def find_secret(input_data: dict) -> Optional[dict]:
+ """Find the secret that matches the input data."""
+ secrets_dir = Path(os.environ.get("NTBL_SECRETS_DIR", "/vault/secrets"))
+ if not secrets_dir.exists():
+ return None
+
+ keys_to_match = ["host", "protocol", "path"]
+ for secret_path in secrets_dir.glob("*.git-cred"):
+ secret_data = json.loads(secret_path.read_text())
+ meta = secret_data["meta"]
+ if all(meta[key] == input_data.get(key) for key in keys_to_match):
+ return secret_data["data"]
+
+ return None
+
+
+def main(stdin=sys.stdin, stdout=sys.stdout):
+ """Main entrypoint."""
+ parsed_input = parse_input(stdin.read())
+ if (secret := find_secret(parsed_input)) is not None:
+ print(format_output(secret), file=stdout)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/ipython_config.py b/ipython_config.py
@@ -0,0 +1,11 @@
+c.InteractiveShellApp.extensions = [
+ "noteable_magics",
+]
+
+c.SqlMagic.feedback = False
+c.SqlMagic.autopandas = True
+c.NTBLMagic.project_dir = "/etc/noteable/project"
+c.NoteableDataLoaderMagic.return_head = False
+c.IPythonKernel._execute_sleep = 0.15
+# 10 minutes to support large files
+c.NTBLMagic.planar_ally_default_timeout_seconds = 600
diff --git a/kernels/python/.pythonrc b/kernels/python/.pythonrc
@@ -0,0 +1,8 @@
+import pandas as pd
+
+import dx
+
+dx.set_option("DISPLAY_MAX_ROWS", 50_000)
+dx.set_option("DISPLAY_MAX_COLUMNS", 100)
+dx.set_option("ENABLE_DATALINK", True)
+dx.set_option("ENABLE_ASSIGNMENT", False)