Skip to content

Commit

Permalink
Initialize project with base kernel (#2)
Browse files Browse the repository at this point in the history
* Add initial builds for python kernel

* Add python version to Dockerfile

* Fix pull requests

* Fix workflow reference

* Fix workflow reference again

* Prefix with ./

* Fix dockerfile / context args

* Add Dockerhub secrets

* Add actions:write

* Set python version as environment variable

* Use build args to set python version

* Disable ARM build

* Use common files layout from legacy project

* Fix makefile script

* Add kernels/ prefix to copy paths

* Fix file paths

* Fix ipython path

* Move environment.txt to kernel

* Reenable arm builds

* Remove julia fix-permissions

* Disable arm builds again

* Remove default build labels

* Remove unsued Make scripts

* Remove duplicate linting block

* Add missing newline

* Add comment regarding docker image versions
  • Loading branch information
robwittman authored Feb 10, 2023
1 parent f27c3dd commit 270355d
Show file tree
Hide file tree
Showing 12 changed files with 510 additions and 0 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Build kernel images

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
base-linting:
name: base-linting
runs-on: ubuntu-22.04

steps:
- name: Checkout Code
uses: actions/checkout@v3

- name: Lint Dockerfile, Shell scripts, YAML
uses: github/super-linter@v4
env:
DEFAULT_BRANCH: master
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Linters to enable
VALIDATE_BASH: true
VALIDATE_BASH_EXEC: true
VALIDATE_DOCKERFILE_HADOLINT: true
VALIDATE_YAML: true

build_python_kernel:
permissions:
id-token: write
contents: read
packages: write
actions: write
uses: ./.github/workflows/reusable-docker-build.yml
strategy:
matrix:
# Must be a supported version by jupyter/datascience-notebook
# https://hub.docker.com/r/jupyter/datascience-notebook/tags?page=1&name=python-
version: [ "3.9.13", "3.8.13" ]
secrets: inherit
with:
dockerfile: ./kernels/python/Dockerfile
context: ./kernels/python
images: |
ghcr.io/${{ github.repository }}/python
tags: |
type=ref,event=branch,prefix=${{ matrix.version }}
type=ref,event=pr,prefix=${{ matrix.version }}
type=sha,format=long,prefix=${{ matrix.version }}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }},prefix=${{ matrix.version }}
build_args: |
PYTHON_VERSION=${{ matrix.version }}
platforms: "linux/amd64"
142 changes: 142 additions & 0 deletions .github/workflows/reusable-docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
name: docker

on:
workflow_call:
inputs:
dockerfile:
description: "Path to the Dockerfile to build"
type: string
default: Dockerfile
context:
description: "The context for Docker build"
type: string
default: "."
platforms:
description: "Comma separate list of platforms to build on"
type: string
required: false
default: "linux/amd64,linux/arm64"
images:
description: "The image names that we want to build"
type: string
required: true
tags:
description: "The various tags to be attached to the built image"
type: string
required: false
default: ""
labels:
description: "The various labels to attach to the built image"
type: string
required: false
default: |
org.opencontainers.image.url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
org.opencontainers.image.vendor=Noteable
org.opencontainers.image.version=${{ github.ref }}
target:
description: "Sets the target stage to build"
type: string
required: false
build_args:
description: "Additional build args to pass to the Docker build"
type: string
required: false
default: ""
secrets:
# We login to Dockerhub to prevent rate limiting issues when pulling images
# https://docs.docker.com/docker-hub/download-rate-limit/
DOCKERHUB_USER:
required: true
DOCKERHUB_PASSWORD:
required: true

jobs:
build:
permissions:
id-token: write
contents: read
packages: write

if: |
github.event_name == 'push' ||
(github.event_name == 'pull_request' && github.event.pull_request.state == 'open')
runs-on: ubuntu-22.04
steps:
- name: Checkout the code
uses: actions/checkout@v3

- name: Copy common files
run: make copy-common-files
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}

- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
version: v0.10.1

# Note: The outputs in github action will show duplicate labels being generated for the meta outputs.
# When the Docker engine builds, it will only take the later values, and our custom labels get added
# at the end. https://github.com/docker/metadata-action/issues/125
- name: Docker metadata for labels and tags
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ inputs.images }}
tags: ${{ inputs.tags }}
labels: ${{ inputs.labels }}

- name: Build and push
uses: docker/build-push-action@v3
with:
platforms: ${{ inputs.platforms }}
context: ${{ inputs.context }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: ${{ inputs.target }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: ${{ inputs.build_args }}

clear_cache:
permissions:
contents: read
actions: write
# If the PR is closed (or merged), we want to clear the cache
if: ${{ github.event_name == 'pull_request' && github.event.pull_request.state == 'closed' }}
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v3

- name: Cleanup
run: |
gh extension install actions/gh-actions-cache
REPO=${{ github.repository }}
BRANCH=${{ github.ref }}
echo "Fetching list of cache key"
cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
## Setting this to not fail the workflow while deleting cache keys.
set +e
echo "Deleting caches..."
for cacheKey in $cacheKeysForPR
do
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
done
echo "Done"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
copy-common-files:
cp requirements.txt kernels/python/
cp ipython_config.py kernels/python/
cp secrets_helper.py kernels/python/
cp git_credential_helper.py kernels/python/
cp git-wrapper.sh kernels/python/
16 changes: 16 additions & 0 deletions git-wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# This script wraps git to only allow certain commands to be run.
# We mainly want to prevent users from getting into unknown states by checking out other branches, etc.

# Allowed command list
allowed_commands=( "commit" "pull" "push" "status" "diff" "add" "fetch" "log" "version" )

# Check if the command is allowed
# shellcheck disable=SC2076
if [[ ! " ${allowed_commands[*]} " =~ " ${1} " ]]; then
echo "That git command is not allowed, contact [email protected] if you think this is a mistake."
exit 1
fi

# Otherwise pass through to git at /usr/bin/git
exec /usr/bin/git "$@"
69 changes: 69 additions & 0 deletions git_credential_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""
This script is used as a Git credential helper https://git-scm.com/docs/git-credential.
We iterate through all the git credential secrets on the file system and return the first one that matches the requested URL.
If no match is found, we return an empty response.
An empty response will cause Git to use the next credential helper in the list, or prompt the user for credentials.
To test this script:
$ cat > /tmp/demo.git-cred <<EOF
{
"meta": {
"type": "USERNAME_PASSWORD",
"host": "github.com",
"protocol": "https",
"path": "foo/bar"
},
"data": {
"username": "demo",
"password": "demo_password"
}
}
EOF
$ export NTBL_SECRETS_DIR=/tmp
$ echo -e "host=github.com\nprotocol=https\npath=foo/bar" | ./git_credential_helper.py
username=demo
password=demo_password
"""

import json
from pathlib import Path
import sys
import os
from typing import Optional


def parse_input(input_: str) -> dict:
"""Parse the input from Git into a dictionary."""
return dict(line.split("=", 1) for line in input_.splitlines())


def format_output(data: dict) -> str:
"""Format the output to Git."""
return "\n".join(f"{key}={value}" for key, value in data.items())


def find_secret(input_data: dict) -> Optional[dict]:
"""Find the secret that matches the input data."""
secrets_dir = Path(os.environ.get("NTBL_SECRETS_DIR", "/vault/secrets"))
if not secrets_dir.exists():
return None

keys_to_match = ["host", "protocol", "path"]
for secret_path in secrets_dir.glob("*.git-cred"):
secret_data = json.loads(secret_path.read_text())
meta = secret_data["meta"]
if all(meta[key] == input_data.get(key) for key in keys_to_match):
return secret_data["data"]

return None


def main(stdin=sys.stdin, stdout=sys.stdout):
"""Main entrypoint."""
parsed_input = parse_input(stdin.read())
if (secret := find_secret(parsed_input)) is not None:
print(format_output(secret), file=stdout)


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions ipython_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
c.InteractiveShellApp.extensions = [
"noteable_magics",
]

c.SqlMagic.feedback = False
c.SqlMagic.autopandas = True
c.NTBLMagic.project_dir = "/etc/noteable/project"
c.NoteableDataLoaderMagic.return_head = False
c.IPythonKernel._execute_sleep = 0.15
# 10 minutes to support large files
c.NTBLMagic.planar_ally_default_timeout_seconds = 600
8 changes: 8 additions & 0 deletions kernels/python/.pythonrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pandas as pd

import dx

dx.set_option("DISPLAY_MAX_ROWS", 50_000)
dx.set_option("DISPLAY_MAX_COLUMNS", 100)
dx.set_option("ENABLE_DATALINK", True)
dx.set_option("ENABLE_ASSIGNMENT", False)
Loading

0 comments on commit 270355d

Please sign in to comment.