diff --git a/.envvars b/.envvars new file mode 100644 index 0000000..340e2ae --- /dev/null +++ b/.envvars @@ -0,0 +1,4 @@ +export DOCKER_BUILDKIT=1 +export VERSION="1.2.0" +export NAME_PACKAGE="package_${VERSION}.tar.gz" +export URL_PACKAGE="https://rostlab.org/public/predictprotein-docker/package/${NAME_PACKAGE}" diff --git a/.gitignore b/.gitignore index e69de29..ce436da 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,2 @@ +package/** +!package/.gitkeep diff --git a/Dockerfile b/Dockerfile index 96ca4bb..b6b4c4e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,41 +3,17 @@ # greater than jessie FROM debian:jessie-slim -# Keep and apt from asking questions +# Keep Debian from asking questions ENV DEBIAN_FRONTEND noninteractive # Addresses package install failing due to non-existent man page directory # See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 RUN mkdir -p /usr/share/man/man1 -# Install public keys for repos -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys FCAE2A0E115C3D8A && \ - apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 228FE7B0D6EBED94 && \ - apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 0xA5D32F012649A5A9 - -# So we can get packages from https repos and some necessary utilities -# By removing /var/lib/apt/lists it reduces the image size, since the apt cache is not stored in a layer. -# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#run -RUN apt-get update && \ - apt-get install -y \ - apt-transport-https \ - bzip2 \ - ca-certificates \ - gcc \ - locales \ - patch \ - wget \ - xz-utils && \ - localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 && \ - rm -rf /var/lib/apt/lists/* - -# New LetsEncrypt Root Certs -COPY /config/etc/ssl/certs/*.pem /etc/ssl/certs/ -RUN sed -i '/^mozilla\/DST_Root_CA_X3.crt$/ s/^/!/' /etc/ca-certificates.conf && \ - update-ca-certificates - -ENV LANG en_US.UTF-8 -ENV LC_ALL en_US.UTF-8 +# Create directory for perlbrew perl version needed to run some methods +# and copy perlbrew-provided compressed perl version +RUN mkdir -p /usr/share/profphd/prof/perl5/dists/ +COPY /package/perlbrew/perl-5.10.1.tar.bz2 /usr/share/profphd/prof/perl5/dists/perl-5.10.1.tar.bz2 # Add necessary repos COPY /config/etc/apt/*.conf /etc/apt/ @@ -45,26 +21,29 @@ COPY /config/etc/apt/preferences.d/*.pref /etc/apt/preferences.d/ COPY /config/etc/apt/sources.list.d/*.list /etc/apt/sources.list.d/ COPY /config/etc/apt/apt.conf.d/* /etc/apt/apt.conf.d/ -# Copy necessary packages no longer available through repos -COPY /package/pp-cache-mgr/libboost/*.deb /tmp/ -COPY /package/pp-cache-mgr/libicu48/*.deb /tmp/ -RUN dpkg -i /tmp/libboost-system1.49.0_1.49.0-3.2_amd64.deb && \ - dpkg -i /tmp/libboost-filesystem1.49.0_1.49.0-3.2_amd64.deb && \ - dpkg -i /tmp/libboost-program-options1.49.0_1.49.0-3.2_amd64.deb && \ - dpkg -i /tmp/libicu48_4.8.1.1-12+deb7u7_amd64.deb && \ - dpkg -i /tmp/libboost-regex1.49.0_1.49.0-3.2_amd64.deb && \ - rm -f /tmp/*.deb - -# Update and install predictprotein from APT repos -RUN apt-get -o "Acquire::https::Verify-Peer=false" update && \ - apt-get -o "Acquire::https::Verify-Peer=false" install -y --allow-unauthenticated rostlab-debian-keyring && \ - apt-get -o "Acquire::https::Verify-Peer=false" install -y \ - librg-pp-bundle-perl \ - pp-cache-mgr \ - predictprotein \ - predictprotein-nonfree \ - profdisis && \ - rm -rf /var/lib/apt/lists/* +# Add necessary debian packages +COPY /package/pp-cache-mgr/libboost/*.deb /var/tmp/ +COPY /package/pp-cache-mgr/libicu48/*.deb /var/tmp/ +COPY /package/system/*.deb /var/tmp/ + +# Main installation of all .deb files, including predictprotein. +# Doing this is an attempt at relying less on external sources for +# the Docker image build. +RUN dpkg --unpack --force-all /var/tmp/*.deb && \ + dpkg --configure -a && \ + rm /var/tmp/*.deb && \ + rm -rf /var/lib/apt/lists/* && \ + rm /usr/share/profphd/prof/perl5/dists/perl-5.10.1.tar.bz2 && \ + localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 + +ENV LANG en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +# New LetsEncrypt Root Certs - otherwise, loctree3 and metatstudent data files +# won't be able to be retrieved. +COPY /config/etc/ssl/certs/*.pem /etc/ssl/certs/ +RUN sed -i '/^mozilla\/DST_Root_CA_X3.crt$/ s/^/!/' /etc/ca-certificates.conf && \ + update-ca-certificates # Now that the packages are installed, copy configs and make necessary ones # available to docker hosts for configuring external services. diff --git a/README.md b/README.md index 2f6eb71..7aa9356 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ # predictprotein Docker --- -The Docker image this creates is the exact version that runs on our cluster at [Rostlab](https://rostlab.org/) and the official [PredictProtein website](https://open.predictprotein.org/) hosted at the [Luxembourg Centre for Systems Biomedicine](https://wwwfr.uni.lu/lcsb) by [ELIXIR-LU](https://elixir-luxembourg.org/), the Luxembourgish node of ELIXIR, the European infrastructure for life science information. +**NEW AS OF 2022-10-10:** A Docker image of this repository is now available at [our Rostlab Quay `predictprotein-docker` Docker image repository](https://registry.rostlab.org/repository/rostlab/predictprotein-docker). That means it is no longer required to build `predictprotein-docker` manually (i.e. using `docker build`) using this repository; although you still may, if you wish. Since third-party repositories may go offline at any time or not renew their encryption keys, effort has gone in to relying on external repositories and sources as little as possible in order to better preserve a reproducable and functioning `predictprotein-docker` image. + +The Docker image this creates is the exact version that runs at the official [PredictProtein website](https://predictprotein.org/) hosted at the [Luxembourg Centre for Systems Biomedicine](https://wwwfr.uni.lu/lcsb) by [ELIXIR-LU](https://elixir-luxembourg.org/), the Luxembourgish node of ELIXIR, the European infrastructure for life science information. In an effort to preserve the current (as of 2020-09-08) version of predictprotein, including all of the methods it uses, this Dockerfile and its supporting packages, configuration files, and scripts, have been created, which will allow predictprotein to be able to be run from any current and (hopefully) future Docker-supported operating system. @@ -39,11 +41,13 @@ Don't do anything with the file just yet - [futher instructions are below](#pred After you've cloned this repository, you should be able to in to its directory, and run: ```shell -$ export DOCKER_BUILDKIT=1 -$ docker build -t predictprotein . +$ ./build.sh ``` -See [Build images with BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) for more information about Docker BuildKit. +In order to build the Docker image, necessary packages will be downloaded from our servers at `rostlab.org` and extracted by `build.sh`, which are then installed in the resulting `predictprotein` image. This was done for three reasons: +1. "Free" tiers of repository hosting services - GitHub, Gitlab, Bitbucket, and so on - do not provide enough repository space to store all of the necessary packages +2. Some of the third-party repositories are not extending/updating their encryption keys, letting them expire, thereby creating errors during the image build process +3. Since many of the packages are relatively old and/or unmaintained, third-party repositories may eventually take them offline, making our Dockerfile unable to be built **This will take a long time** - Go grab a coffee, a snack, a walk in the park, or repair a hole in a sock (or two). diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..9770266 --- /dev/null +++ b/build.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +# 20221011 TAK +# This script downloads the necessary files for building the predictprotein Docker image +# since the packages result in the git repository being too large for free plans on +# various well-known public repositories such as GitHub, Bitbucket and Gitlab + +# Default values +VERSION="1.2.0" +NAME_PACKAGE="package_${VERSION}.tar.gz" +URL_PACKAGE="https://rostlab.org/public/predictprotein-docker/package/${NAME_PACKAGE}" + +# Override defaults in this file +. $(pwd)/.envvars + +# Check if necessary package directory exists +if [ ! -d package/system ]; then + echo "Necessary system build packages not found." + # Directory package/system doesn't exist, see if the package archive exists; + # If not, attempt to download it. + if [ ! -f ${NAME_PACKAGE} ]; then + echo "Attempting to download and extract packages archive '${NAME_PACKAGE}'..." + if command -v wget &> /dev/null; then + wget -c ${URL_PACKAGE} -O - | tar -xz + echo "Done." + elif command -v curl &> /dev/null; then + curl ${URL_PACKAGE} | tar -xz + echo "Done." + else + echo "Error!" + echo "Unable to download necessary package files for Dockerfile build. You need to install either 'wget' or 'curl'." + echo "Alternatively, you can manually download the necssary file to the currentl directory from: ${URL_PACKAGE}" + exit 1 + fi + else + # Here if the package/system directory doesn't exist, but the package archive does. + # Possible if archive is manually downloaded, for example + echo -n "Extracting ${NAME_PACKAGE}..." + tar -xzf ${NAME_PACKAGE} + echo "Done." + # Do not keep the package archive; otherwise, it will end up in our build context adding unecessary size. + echo -n "Removing ${NAME_PACKAGE}..." + rm ${NAME_PACKAGE} + echo "Done." + fi +fi + +# We're here if the package/system directory exists +# Make sure package archive is removed +if [ -f ${NAME_PACKAGE} ]; then + rm ${NAME_PACKAGE} +fi +if command -v docker &> /dev/null; then + echo "Attempting to build docker image, predictprotein-docker:${VERSION} from Dockerfile." + docker build -t predictprotein . + if [ $? -eq 0 ]; then + echo "Finished! You may now use the 'docker run' command to start a Docker container." + echo "Please refer to the predictprotein-docker README.md and docker-run documentation for usage information." + else + echo "There was an error building the Docker image! If you can't resolve the problem on your own, please" + echo "contact us, providing abdequate information in order to assist you, at help@predictprotein.org" + exit 1 + fi +fi + +exit 0 diff --git a/package/.gitkeep b/package/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/package/pp-cache-mgr/libboost/libboost-filesystem1.49.0_1.49.0-3.2_amd64.deb b/package/pp-cache-mgr/libboost/libboost-filesystem1.49.0_1.49.0-3.2_amd64.deb deleted file mode 100644 index 8f3900b..0000000 Binary files a/package/pp-cache-mgr/libboost/libboost-filesystem1.49.0_1.49.0-3.2_amd64.deb and /dev/null differ diff --git a/package/pp-cache-mgr/libboost/libboost-program-options1.49.0_1.49.0-3.2_amd64.deb b/package/pp-cache-mgr/libboost/libboost-program-options1.49.0_1.49.0-3.2_amd64.deb deleted file mode 100644 index 0a0fb29..0000000 Binary files a/package/pp-cache-mgr/libboost/libboost-program-options1.49.0_1.49.0-3.2_amd64.deb and /dev/null differ diff --git a/package/pp-cache-mgr/libboost/libboost-regex1.49.0_1.49.0-3.2_amd64.deb b/package/pp-cache-mgr/libboost/libboost-regex1.49.0_1.49.0-3.2_amd64.deb deleted file mode 100644 index b788afb..0000000 Binary files a/package/pp-cache-mgr/libboost/libboost-regex1.49.0_1.49.0-3.2_amd64.deb and /dev/null differ diff --git a/package/pp-cache-mgr/libboost/libboost-system1.49.0_1.49.0-3.2_amd64.deb b/package/pp-cache-mgr/libboost/libboost-system1.49.0_1.49.0-3.2_amd64.deb deleted file mode 100644 index 53342e2..0000000 Binary files a/package/pp-cache-mgr/libboost/libboost-system1.49.0_1.49.0-3.2_amd64.deb and /dev/null differ diff --git a/package/pp-cache-mgr/libicu48/libicu48_4.8.1.1-12+deb7u7_amd64.deb b/package/pp-cache-mgr/libicu48/libicu48_4.8.1.1-12+deb7u7_amd64.deb deleted file mode 100644 index ae3063a..0000000 Binary files a/package/pp-cache-mgr/libicu48/libicu48_4.8.1.1-12+deb7u7_amd64.deb and /dev/null differ