bird-house · ChaamC · May 11, 2021 · Mar 15, 2021 · Mar 31, 2021 · Apr 7, 2021
@@ -3,6 +3,8 @@ from os.path import join
 import logging
 import subprocess
 
+from dockerspawner import DockerSpawner
+
 c = get_config()  # noqa  # can be called directy without import because injected by IPython
 
 c.JupyterHub.bind_url = 'http://:8000/jupyter'
@@ -20,7 +22,22 @@ c.JupyterHub.db_url = '/persist/jupyterhub.sqlite'
 
 c.JupyterHub.template_paths = ['/custom_templates']
 
-c.JupyterHub.spawner_class = 'dockerspawner.DockerSpawner'
+class CustomDockerSpawner(DockerSpawner):
+
+    def start(self):
+        if(os.environ['MOUNT_IMAGE_SPECIFIC_NOTEBOOKS'] == 'true'):
+            host_dir = join(os.environ['JUPYTERHUB_USER_DATA_DIR'], 'tutorial-notebooks', self.user_options.get('image'))
+            image_dir = join('/notebook_dir/tutorial-notebooks', self.user_options.get('image'))
+
+            # Mount a volume with a tutorial-notebook subfolder corresponding to the image name, if it exists
+            if(os.path.isdir(host_dir)):
+                self.volumes[host_dir] = {
+                    'bind': image_dir,
+                    'mode': 'ro'
+                }
+        return super().start()
+
+c.JupyterHub.spawner_class = CustomDockerSpawner
 
 # Selects the first image from the list by default
 c.DockerSpawner.image = os.environ['DOCKER_NOTEBOOK_IMAGES'].split()[0]
@@ -49,9 +66,10 @@ if len(host_gdrive_settings_path) > 0:
         "mode": "ro"
     }
 
+#  Mount folder containing the notebooks that should be available to all images
 host_tutorial_notebooks_dir = join(jupyterhub_data_dir, "tutorial-notebooks")
-c.DockerSpawner.volumes[host_tutorial_notebooks_dir] = {
-    "bind": join(notebook_dir, "tutorial-notebooks"),
+c.DockerSpawner.volumes[join(host_tutorial_notebooks_dir, "common")] = {
+    "bind": join(notebook_dir, "tutorial-notebooks/common"),
     "mode": "ro"
 }
 

@@ -11,6 +11,9 @@ export THREDDS_IMAGE="unidata/thredds-docker:4.6.15"
 # Folder on the host to persist Jupyter user data (noteboooks, HOME settings)
 export JUPYTERHUB_USER_DATA_DIR="/data/jupyterhub_user_data"
 
+# Activates mounting a tutorial-notebooks subfolder that has the same name as the spawned image on JupyterHub
+export MOUNT_IMAGE_SPECIFIC_NOTEBOOKS=false
+
 # Path to the file containing the clientID for the google drive extension for jupyterlab
 export JUPYTER_GOOGLE_DRIVE_SETTINGS=""
 

diff --git a/birdhouse/deployment/deploy-data-specific-image b/birdhouse/deployment/deploy-data-specific-image
@@ -0,0 +1,88 @@
+#!/bin/bash
+# Deploy data from git repo(s) to local folder(s).
+# This script is run directly on a specific image (such as pavics/crim-jupyter-eo or pavics/crim-jupyter-nlp).
+# It will be used to download and update the different tutorial notebooks associated with a specific image.
+#
+# This is meant to be run on the same host running PAVICS.
+#
+# The data details is specified in a yaml config file (TEMPLATE_CONFIG_YML), using this format :
+# - repo_url: path to the github repo (ex.: https://github.com/crim-ca/pavics-jupyter-images)
+#   branch: name of the branch containing the required version of the data
+#   source_path: path of the desired file or folder in the source repo
+#   dest_dir: directory where the data will be copied to in the specific image where the script is run
+#
+# One or more entries respecting this format can be added to the yaml file, in order to download multiple file/folders.
+#
+# An installation of jq and yq is preferred prior to executing this script.
+#
+# Setting environment variable DEPLOY_DATA_LOGFILE='/path/to/logfile.log'
+# will redirect all STDOUT and STDERR to that logfile so this script will be
+# completely silent.
+
+if [ ! -z "$DEPLOY_DATA_LOGFILE" ]; then
+    exec >>$DEPLOY_DATA_LOGFILE 2>&1
+fi
+
+START_TIME="`date -Isecond`"
+echo "==========
+deploy-data-specific-image START_TIME=$START_TIME"
+
+TEMPLATE_CONFIG_YML="$1"
+if [ -z "$TEMPLATE_CONFIG_YML" ]; then
+    echo "ERROR: missing config.yml file" 1>&2
+    exit 2
+fi
+
+# Empty value could mean typo in the keys in the config file.
+ensure_not_null() {
+    if [ "$*" = null ]; then
+        echo "ERROR: value empty" 1>&2
+        exit 1
+    fi
+}
+
+# Check installation of required package to read yaml files
+if ! command -v jq &> /dev/null; then
+  apt-get -y install jq
+fi
+if ! command -v yq &> /dev/null; then
+  pip install yq
+fi
+
+# Find how many entries are in the config file
+LENGTH="`yq -r '. | length' $TEMPLATE_CONFIG_YML`"
+
+if [ -z $LENGTH ]; then
+    echo "ERROR: empty config file" 1>&2
+    exit 1
+fi
+
+CONFIG_YML="/tmp/notebook_config.yml"
+# Replace environment variables from template to their actual values
+envsubst < $TEMPLATE_CONFIG_YML > $CONFIG_YML
+
+for ((i=0;i<$LENGTH;i++)); do
+  # Extract data from config
+  GIT_REPO="`yq -r .[$i].repo_url $CONFIG_YML`"
+  ensure_not_null "$GIT_REPO"
+
+  BRANCH="`yq -r .[$i].branch $CONFIG_YML`"
+  ensure_not_null "$BRANCH"
+
+  SOURCE_PATH="`yq -r .[$i].source_path $CONFIG_YML`"
+  ensure_not_null "$SOURCE_PATH"
+
+  DEST_DIR=$( eval echo "`yq -r .[$i].dest_dir $CONFIG_YML`")
+  ensure_not_null "$DEST_DIR"
+
+  FULL_URL=$GIT_REPO/branches/$BRANCH/$SOURCE_PATH
+
+  echo "Extracting ${FULL_URL} to ${DEST_DIR}"
+
+  # Download the data from github and copy it to the destination directory
+  svn export --force $FULL_URL $DEST_DIR
 #if [ -f "$COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env" \ 
 #if [ -f "$COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env" \ 
+done
+
+echo "notebookdeploy finished   END_TIME=`date -Isecond`"
+
+# vi: tabstop=8 expandtab shiftwidth=4 softtabstop=4
@@ -358,6 +358,7 @@ services:
       JUPYTER_DEMO_USER_CPU_LIMIT: ${JUPYTER_DEMO_USER_CPU_LIMIT}
       JUPYTER_GOOGLE_DRIVE_SETTINGS: ${JUPYTER_GOOGLE_DRIVE_SETTINGS}
       JUPYTERHUB_README: ${JUPYTERHUB_README}
+      MOUNT_IMAGE_SPECIFIC_NOTEBOOKS: ${MOUNT_IMAGE_SPECIFIC_NOTEBOOKS}
     volumes:
       - ./config/jupyterhub/jupyterhub_config.py:/srv/jupyterhub/jupyterhub_config.py:ro
       - ./config/jupyterhub/custom_templates:/custom_templates:ro

@@ -142,12 +142,33 @@ export POSTGRES_MAGPIE_PASSWORD=postgres-qwerty
 # "Notebook" are all the tutorial notebooks on Jupyter.
 #export AUTODEPLOY_NOTEBOOK_FREQUENCY="@every 5m"
 
+# Log directory used for extra autodeploy tasks
+# export AUTO_DEPLOY_LOGDIR=/var/log/PAVICS
+
 # Add more jobs to ./components/scheduler/config.yml
 #
 # Potential usages: other deployment, backup jobs on the same machine
 #
 #export AUTODEPLOY_EXTRA_SCHEDULER_JOBS=""
 #
+# Example extra job that deploys custom notebooks for a specific image
+#export AUTODEPLOY_EXTRA_SCHEDULER_JOBS="
+#- name: notebookdeploy-eo
+#  comment: Auto-deploy tutorial notebooks for the eo image
+#  schedule: '${AUTODEPLOY_NOTEBOOK_FREQUENCY}'
+#  command: '/deploy-data-specific-image /notebook_config.yml.template'
 if [ -z "`echo "$AUTODEPLOY_EXTRA_SCHEDULER_JOBS" | grep $DEPLOY_DATA_JOB_JOB_NAME`" ]; then 
     # Add job only if not already added (config is read more than once during 
     # autodeploy process). 
 # Load pre-configured cronjob to automatically deploy Raven testdata to Thredds 
 # for Raven tutorial notebooks. 
 # 
 # See the job for additional possible configurations.  The "scheduler" 
 # component needs to be enabled for this pre-configured job to work. 
 # 
 #if [ -f "$COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env" \ 
 #     -a -f "$COMPOSE_DIR/components/scheduler/deploy_data_job.env" ]; then 
 #  . $COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env 
 #  . $COMPOSE_DIR/components/scheduler/deploy_data_job.env 
 #fi 
 if [ -z "`echo "$AUTODEPLOY_EXTRA_SCHEDULER_JOBS" | grep $DEPLOY_DATA_JOB_JOB_NAME`" ]; then 
  
     # Add job only if not already added (config is read more than once during 
     # autodeploy process). 
 # Load pre-configured cronjob to automatically deploy Raven testdata to Thredds 
 # for Raven tutorial notebooks. 
 # 
 # See the job for additional possible configurations.  The "scheduler" 
 # component needs to be enabled for this pre-configured job to work. 
 # 
 #if [ -f "$COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env" \ 
 #     -a -f "$COMPOSE_DIR/components/scheduler/deploy_data_job.env" ]; then 
 #  . $COMPOSE_DIR/components/scheduler/deploy_raven_testdata_to_thredds.env 
 #  . $COMPOSE_DIR/components/scheduler/deploy_data_job.env 
 #fi 
+#  dockerargs: >-
+#    --rm --name notebookdeploy-eo
+#    --volume /var/run/docker.sock:/var/run/docker.sock:ro
+#    --volume ${COMPOSE_DIR}/deployment/deploy-data-specific-image:/deploy-data-specific-image:ro
+#    --volume ${JUPYTERHUB_USER_DATA_DIR}:${JUPYTERHUB_USER_DATA_DIR}:rw
+#    --volume ${AUTO_DEPLOY_LOGDIR}:${AUTO_DEPLOY_LOGDIR}:rw
+#    --env JUPYTERHUB_USER_DATA_DIR=${JUPYTERHUB_USER_DATA_DIR}
+#    --env DEPLOY_DATA_LOGFILE=${AUTO_DEPLOY_LOGDIR}/notebookdeploy-eo.log
+#    --user 0:0
+#  image: ${DOCKER_EO_IMAGE}
+#"
+#
 # Load pre-configured job to auto-renew LetsEncrypt SSL certificate if a
 # LetsEncrypt SSL certificate has previously been requested.
 #
@@ -208,8 +229,10 @@ export POSTGRES_MAGPIE_PASSWORD=postgres-qwerty
 
 # Jupyter single-user server images
 #export DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:210216 \
-#                               pavics/crim-jupyter-eo:0.1.0 \
-#                               pavics/crim-jupyter-nlp:0.1.0"
+#                               ${DOCKER_EO_IMAGE} \
+#                               ${DOCKER_NLP_IMAGE}"
+#export DOCKER_EO_IMAGE="pavics/crim-jupyter-eo:0.1.0"
+#export DOCKER_NLP_IMAGE="pavics/crim-jupyter-nlp:0.1.0"
 
 # allow jupyterhub user selection of which notebook image to run
 # see https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html
@@ -226,6 +249,9 @@ export POSTGRES_MAGPIE_PASSWORD=postgres-qwerty
 #                                   }
 #"
 
+# Activates mounting a tutorial-notebooks subfolder that has the same name as the spawned image on JupyterHub
+# export MOUNT_IMAGE_SPECIFIC_NOTEBOOKS=true
+
 # The parent folder where all the user notebooks will be stored.
 # For example, a user named "bob" will have his data in $JUPYTERHUB_USER_DATA_DIR/bob
 # and this folder will be mounted when he logs into JupyterHub.