Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jenkins: make it possible to run notebooks from external repos programmatically #138

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
e594d24
downloadrepos: allow to be used as library as well
tlvu May 14, 2024
5216bca
downloadrepos: only download repos that will be tested
tlvu May 14, 2024
57936f5
testall: make processing here available to other repos via downloadrepos
tlvu May 14, 2024
cccae70
testall: make available all functions in downloadrepos to CONFIG_PARA…
tlvu May 14, 2024
b84da18
Jenkins: all artifacts are now under buildout/ so easier to add new n…
tlvu May 14, 2024
cfe5f95
testall: git clean before CONFIG_PARAMETERS_SCRIPT_URL to avoid wipin…
tlvu May 14, 2024
7253e6a
runtest: allow to override --nbval-sanitize-with from CONFIG_OVERRIDE…
tlvu May 14, 2024
d93b229
downloadrepos: avoid set +x because it hides all subsequent commands …
tlvu May 14, 2024
9160053
jenkins sample override: demo runnings notebooks from an external repo
tlvu May 14, 2024
25c0abb
runtest: CONFIG_PARAMETERS_SCRIPT_URL can override DEFAULT_PRODUCTION…
tlvu May 14, 2024
0182490
runtest: add repo and branch name to archived nbs name to id which re…
tlvu May 16, 2024
1cf434e
Merge remote-tracking branch 'origin/master' into make-it-easier-to-a…
tlvu Jun 5, 2024
a20ef26
Merge remote-tracking branch 'origin/master' into make-it-easier-to-a…
tlvu Oct 31, 2024
36fba3d
testall: try to avoid delete_files_confusing_pytest by using py.test …
tlvu Oct 31, 2024
5f13ec9
runtest: allow saved files under buildout/ to have hierarchy
tlvu Oct 31, 2024
c9c8d50
tests: allow to override artifact nb filename format archived by Jenkins
tlvu Oct 31, 2024
7fce738
runtest: allow overrideable post-processing steps
tlvu Oct 31, 2024
d0265f3
test artifacts: shorter filename format, name clash already handled
tlvu Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -144,16 +144,7 @@ Note this is another run, will double the time and no guaranty to have same erro

post {
always {
archiveArtifacts(artifacts: 'notebooks/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'pavics-sdi-*/docs/source/notebooks/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'pavics-sdi-*/docs/source/notebook-components/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'finch-*/docs/source/notebooks/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'raven-*/docs/source/notebooks/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'RavenPy-*/docs/notebooks/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'RavenPy-*/docs/notebooks/paper/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'esgf-compute-api-*/examples/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'PAVICS-landing-*/content/notebooks/climate_indicators/*.ipynb', fingerprint: true)
archiveArtifacts(artifacts: 'buildout/*.output.ipynb', fingerprint: true, allowEmptyArchive: true)
archiveArtifacts(artifacts: 'buildout/**/*.ipynb', fingerprint: true, allowEmptyArchive: true)
archiveArtifacts(artifacts: 'buildout/env-dump/', fingerprint: true)
}
unsuccessful { // Run if the current builds status is "Aborted", "Failure" or "Unstable"
Expand Down
124 changes: 111 additions & 13 deletions downloadrepos
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/sh
# This file can be used both as executable script or library to be sourced.
# To use as library to be sourced, set DOWNLOADREPOS_AS_LIB=1 env var.

downloadrepos() {
github_repo="$1"; shift
Expand All @@ -13,25 +15,121 @@ downloadgithubrepos() {
repo_owner="`echo "$owner_and_repo_name" | sed "s@/.*\\$@@g"`"
repo_name="`echo "$owner_and_repo_name" | sed "s@^.*/@@g"`"
repo_branch="$1"; shift
set -x
# clean up other previously downloaded branches of the same repo as well
rm -rf ${repo_name}-*
ls | grep $repo_name
downloadrepos https://github.com/$repo_owner/$repo_name "$repo_branch"
ls | grep $repo_name
set +x
}

. ./default_build_params
# USAGE: VAR_TO_LOWER="$(lowercase "$VAR_TO_LOWER")"
lowercase() {
echo "$1" | tr '[:upper:]' '[:lower:]'
}

lowercase_boolean_build_params() {
TEST_MAGPIE_AUTH="$(lowercase "$TEST_MAGPIE_AUTH")"
TEST_PAVICS_SDI_REPO="$(lowercase "$TEST_PAVICS_SDI_REPO")"
TEST_PAVICS_SDI_WEAVER="$(lowercase "$TEST_PAVICS_SDI_WEAVER")"
TEST_FINCH_REPO="$(lowercase "$TEST_FINCH_REPO")"
TEST_PAVICS_LANDING_REPO="$(lowercase "$TEST_PAVICS_LANDING_REPO")"
TEST_RAVEN_REPO="$(lowercase "$TEST_RAVEN_REPO")"
TEST_RAVENPY_REPO="$(lowercase "$TEST_RAVENPY_REPO")"
TEST_ESGF_COMPUTE_API_REPO="$(lowercase "$TEST_ESGF_COMPUTE_API_REPO")"
TEST_LOCAL_NOTEBOOKS="$(lowercase "$TEST_LOCAL_NOTEBOOKS")"
}

# Replace all slash (/) by dash (-) because (/) is illegal in folder name
# for branch name of the format "feature/my_wizbang-feature".
# Github does the same when downloading repo archive by downloadrepos above.
# USAGE: export BRANCH_NAME="$(sanitize_branch_name "$BRANCH_NAME")"
sanitize_branch_name() {
echo "$1" | sed "s@/@-@g"
}

# Ex: extract 'pavics-sdi' from 'Ouranosinc/pavics-sdi'.
# USAGE: REPO_NAME_ONLY="$(extract_repo_name "$REPO_NAME")"
extract_repo_name() {
echo "$1" | sed "s@^.*/@@g"
}

# Branches that have allowed characters such as '+' other than alphanum, '-', '_' and '.' are converted to '-' in archives.
# USAGE: FOLDER_NAME="$(sanitize_extracted_folder_name "$FOLDER_NAME")"
sanitize_extracted_folder_name() {
echo "$1" | sed "s@[^a-zA-Z0-9_\-\.]@-@g"
}

downloadrepos_main() {
. ./default_build_params

lowercase_boolean_build_params

if [ -z "$DOWNLOAD_ALL_DEFAULT_REPOS" ]; then
# Back-compat with old default behavior, used in binder/reorg-notebook
# and other external scripts that autodeploy tutorial notebooks (see
# https://github.com/bird-house/birdhouse-deploy/blob/444a7c35a31aa8ad351e47f659383ba5c2919705/birdhouse/deployment/trigger-deploy-notebook#L64-L75)
DOWNLOAD_ALL_DEFAULT_REPOS=true
fi
Comment on lines +67 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would rather have the other script set DOWNLOAD_ALL_DEFAULT_REPOS=true if it needs all of them and have the default behavior of skipping unnecessary downloads.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, that's the point. If the other scripts have to set this new DOWNLOAD_ALL_DEFAULT_REPOS=true it means I break backward-compatibility with the other scripts.

I want no changes to other scripts outside this repo.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead, every CI needs to inject the new variable to take advantage of more intelligent download rather than getting it for free. Since birdhouse-deploy does wget to retrieve those scripts, it makes more sense IMO that it updates with new features, or use an explicit commit hash or tag to ensure the behavior remains consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

every CI needs to inject the new variable to take advantage of more intelligent download

Huh, the only one is this Jenkins or more precisely the testall script. If someone deploy this job on another server, the entrypoint is still the testall script, not this downloadrepos directly.

use an explicit commit hash or tag to ensure the behavior remains consistent.

Using exact commit hash is like pinning everything in your requirements.txt. We do not do this because update will be too tedious, same here.


if [ -z "$1" ]; then
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_PAVICS_SDI_REPO" = xtrue ]; then
downloadgithubrepos $PAVICS_SDI_REPO $PAVICS_SDI_BRANCH
fi
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_FINCH_REPO" = xtrue ]; then
downloadgithubrepos $FINCH_REPO $FINCH_BRANCH
fi
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_PAVICS_LANDING_REPO" = xtrue ]; then
downloadgithubrepos $PAVICS_LANDING_REPO $PAVICS_LANDING_BRANCH
fi
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_RAVEN_REPO" = xtrue ]; then
downloadgithubrepos $RAVEN_REPO $RAVEN_BRANCH
fi
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_RAVENPY_REPO" = xtrue ]; then
downloadgithubrepos $RAVENPY_REPO $RAVENPY_BRANCH
fi
if [ x"$DOWNLOAD_ALL_DEFAULT_REPOS" = xtrue ] || [ x"$TEST_ESGF_COMPUTE_API_REPO" = xtrue ]; then
downloadgithubrepos $ESGF_COMPUTE_API_REPO $ESGF_COMPUTE_API_BRANCH
fi
else
set -x
downloadrepos "$@"
fi
}


# Choose artifact filename format under buildout/ dir that is archived by Jenkins.
#
# The corresponding .output.ipynb will also have the same filename format.
#
# Override this function in CONFIG_OVERRIDE_SCRIPT_URL to choose another
# format, see demo in test-override/jenkins-params-external-repos.include.sh.
#
# Ex: when given 'pavics-sdi-master/docs/source/notebooks/regridding.ipynb',
# the current implementation will return
# 'pavics-sdi-master--regridding.ipynb', which means
# there will be
# "buildout/pavics-sdi-master--regridding.ipynb" and
# "buildout/pavics-sdi-master--regridding.output.ipynb"
#
# USAGE: artifact_filename="$(choose_artifact_filename "$original_nb_filename")"
choose_artifact_filename() {
repo_branch="$(echo "$1" | sed "s@/.*@@")"
echo "${repo_branch}--$(basename "$1")"
}


# Any post-processing steps at the end of runtest.
#
# Override this function in CONFIG_OVERRIDE_SCRIPT_URL to add extra steps,
# see demo in test-override/jenkins-params-external-repos.include.sh.
#
post_runtest() {
# Can not have empty function, have to put something here.
echo "Default: no post_runtest() override."
}


if [ -z "$1" ]; then
downloadgithubrepos $PAVICS_SDI_REPO $PAVICS_SDI_BRANCH
downloadgithubrepos $FINCH_REPO $FINCH_BRANCH
downloadgithubrepos $PAVICS_LANDING_REPO $PAVICS_LANDING_BRANCH
downloadgithubrepos $RAVEN_REPO $RAVEN_BRANCH
downloadgithubrepos $RAVENPY_REPO $RAVENPY_BRANCH
downloadgithubrepos $ESGF_COMPUTE_API_REPO $ESGF_COMPUTE_API_BRANCH
else
set -x
downloadrepos "$@"
if [ -z "$DOWNLOADREPOS_AS_LIB" ]; then
# Script mode, not library mode.
downloadrepos_main "$@"
fi
51 changes: 36 additions & 15 deletions runtest
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/bin/sh

DEFAULT_PRODUCTION_HOST="pavics.ouranos.ca"
# Load shared functions, make available to CONFIG_OVERRIDE_SCRIPT_URL.
DOWNLOADREPOS_AS_LIB=1
. ./downloadrepos

# CONFIG_PARAMETERS_SCRIPT_URL can override DEFAULT_PRODUCTION_HOST.
DEFAULT_PRODUCTION_HOST="${DEFAULT_PRODUCTION_HOST:=pavics.ouranos.ca}"

NOTEBOOKS="$1"
if [ -z "$NOTEBOOKS" ]; then
Expand Down Expand Up @@ -63,11 +68,12 @@ if [ -n "$CONFIG_OVERRIDE_SCRIPT_URL" ]; then
fi
fi

py.test --nbval $NOTEBOOKS --nbval-sanitize-with notebooks/output-sanitize.cfg $PYTEST_EXTRA_OPTS
# CONFIG_OVERRIDE_SCRIPT_URL can override NBVAL_SANITIZE_CFG_FILE.
py.test --rootdir=. --nbval $NOTEBOOKS --nbval-sanitize-with "${NBVAL_SANITIZE_CFG_FILE:=notebooks/output-sanitize.cfg}" $PYTEST_EXTRA_OPTS
EXIT_CODE="$?"

# lowercase SAVE_RESULTING_NOTEBOOK string
SAVE_RESULTING_NOTEBOOK="`echo "$SAVE_RESULTING_NOTEBOOK" | tr '[:upper:]' '[:lower:]'`"
SAVE_RESULTING_NOTEBOOK="$(lowercase "$SAVE_RESULTING_NOTEBOOK")"


# save notebooks resulting from the run
Expand All @@ -79,24 +85,39 @@ SAVE_RESULTING_NOTEBOOK="`echo "$SAVE_RESULTING_NOTEBOOK" | tr '[:upper:]' '[:lo
# work-around as nbval can not save the result of the run
# see https://github.com/computationalmodelling/nbval/issues/112

if [ x"$SAVE_RESULTING_NOTEBOOK" = xtrue ]; then
mkdir -p buildout
for nb in $NOTEBOOKS; do
filename="`basename "$nb"`"
filename="`echo "$filename" | sed "s/.ipynb$//"`" # remove .ipynb ext
if [ -e "buildout/${filename}.output.ipynb" ]; then
# prevent name clash
filename="${filename}_`date '+%s'`"
fi
BUILDOUT_DIR="buildout" # hardcode in Jenkinsfile, can not be override.

mkdir -p "$BUILDOUT_DIR/"
for nb in $NOTEBOOKS; do
filename="$(choose_artifact_filename "$nb")"
filename="$(echo "$filename" | sed "s/.ipynb$//")" # remove .ipynb ext
if [ -e "${BUILDOUT_DIR}/${filename}.ipynb" ]; then
# prevent name clash
filename="${filename}_$(date '+%s')"
fi

# Save original notebooks that we sed replace the PAVICS_HOST.
dir_filename="$(dirname "${filename}")"
if [ -n "${dir_filename}" ]; then
mkdir -p "${BUILDOUT_DIR}/${dir_filename}"
fi
cp "$nb" "$BUILDOUT_DIR/${filename}.ipynb"

if [ x"$SAVE_RESULTING_NOTEBOOK" = xtrue ]; then
# Timeout must not be more than 240s (4 mins). Default in Jenkinsfile.
# Tutorial notebooks should be fast so user do not lose patience waiting
# for them to run. If more than 4 mins, in addition to simplifying the
# notebook, should also check machine performance.
jupyter nbconvert --to notebook --execute \
--ExecutePreprocessor.timeout=${SAVE_RESULTING_NOTEBOOK_TIMEOUT:=240} --allow-errors \
--output-dir buildout --output "${filename}.output.ipynb" "$nb"
done
fi
--output-dir "${BUILDOUT_DIR}" --output "${filename}.output.ipynb" "$nb"
fi
done


# Post-processing steps override in CONFIG_OVERRIDE_SCRIPT_URL.
post_runtest


# exit with return code from py.test
exit $EXIT_CODE
58 changes: 58 additions & 0 deletions test-override/jenkins-params-external-repos.include.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/sh
#
# Sample Jenkins params override script to demonstrate running new notebooks
# from an external repo and on-the-fly CONFIG_OVERRIDE_SCRIPT_URL file creation.
#
# This script is intended for param CONFIG_PARAMETERS_SCRIPT_URL.

# Scenario: we want to run notebooks from an external repo, unknown to current Jenkins config.
# https://github.com/roocs/rook/tree/master/notebooks/*.ipynb

# Disable all existing default repos to avoid downloading them and running them.
TEST_PAVICS_SDI_REPO="false"
TEST_FINCH_REPO="false"
TEST_PAVICS_LANDING_REPO="false"
TEST_LOCAL_NOTEBOOKS="false"

# Set new external repo vars. Need 'export' so CONFIG_OVERRIDE_SCRIPT_URL can see them.
export ROOK_REPO="roocs/rook"
export ROOK_BRANCH="master"

# Not checking for expected output, just checking whether the code can run without errors.
PYTEST_EXTRA_OPTS="$PYTEST_EXTRA_OPTS --nbval-lax"

# Create CONFIG_OVERRIDE_SCRIPT_URL file on-the-fly to run the notebooks from
# our external repo.

CONFIG_OVERRIDE_SCRIPT_URL="/tmp/custom-repos.include.sh"

# Populate the content of our CONFIG_OVERRIDE_SCRIPT_URL.
echo '
#!/bin/sh
# Sample config override script to run new notebooks from new external repo.

# Replicate processing steps in 'testall' script.

# Download the external repo.
downloadgithubrepos $ROOK_REPO $ROOK_BRANCH

# Prep vars for including new nbs in nb list to test.
ROOK_REPO_NAME="$(extract_repo_name "$ROOK_REPO")"
ROOK_DIR="$(sanitize_extracted_folder_name "${ROOK_REPO_NAME}-${ROOK_BRANCH}")"

# Set new nbs as nb list to test.
NOTEBOOKS="$ROOK_DIR/notebooks/*.ipynb"

# Sample demo override choose_artifact_filename: keep the original file path hierarchy.
choose_artifact_filename() {
echo "$1"
}

# Sample demo override post_runtest: create lots of artifacts for Jenkins to
# archive to test how Jenkins will display its archive page.
post_runtest() {
for i in $(seq --equal-width 500); do
echo "file${i}" > "${BUILDOUT_DIR}/file${i}.ipynb"
done
}
' > "$CONFIG_OVERRIDE_SCRIPT_URL"
Loading