diff --git a/dockerfiles/README.md b/dockerfiles/README.md index 8d0e0ed5f..0146e8816 100644 --- a/dockerfiles/README.md +++ b/dockerfiles/README.md @@ -1,5 +1,18 @@ Dockerfiles for Skosmos. +## Prerequisites + +The following software versions were tested successfully with +the docker configuration files used in this document. + +- Ubuntu Linux jammy 22.04.3 LTS +- Docker version 24.0.5, build ced0996 +- Docker Compose version v2.20.2 +- Internet connection to download base images and other dependencies +- At least 1G of storage space for the images (~530M for Skosmos, + ~140M for Jena Fuseki, and ~260M for Varnish cache), or more + depending on your use of vocabularies and data + ## Running with Docker The following commands will build and tag the image it with `skosmos:test`, @@ -25,7 +38,7 @@ with the vocabulary data, and to be available at `http://localhost:3030`. For this last requisite you must create a [Docker network](https://docs.docker.com/network/network-tutorial-standalone/), use [`--net=host`](https://docs.docker.com/network/host/) or other mechanisms for -that. See the section [Running with docker-compose](#running-with-docker-compose) +that. See the section [Running with docker compose](#running-with-docker-compose) if you would like to use Docker Compose. To stop the container: @@ -44,21 +57,29 @@ two files as necessary. **NOTE**: If you would like to start a Fuseki container to test with Docker only, without Docker Compose, you can try the following command before loading your vocabulary data. It starts a container in the same way our other example with -the `docker-compose` command. +the `docker compose` command. + + export JENA_4_VERSION=4.8.0 + + docker build -t jena-fuseki:$JENA_4_VERSION \ + --build-arg JENA_VERSION=$JENA_4_VERSION \ + --no-cache dockerfiles/jena-fuseki2-docker + + docker run --name fuseki --rm -ti \ + -v $(pwd)/config/skosmos.ttl:/fuseki/skosmos.ttl \ + -e "JAVA_OPTIONS=-Xmx2g -Xms1g" \ + -p 3030:3030 \ + jena-fuseki:$JENA_4_VERSION \ + --config=/fuseki/skosmos.ttl - docker run --name fuseki -ti --rm \ - --env "ADMIN_PASSWORD=admin" --env "JVM_ARGS=-Xmx2g" \ - -p 3030:3030 \ - --mount type=bind,src=$(pwd)/config/skosmos.ttl,dst=/fuseki/configuration/skosmos.ttl \ - stain/jena-fuseki + curl -XPOST http://localhost:3030/skosmos/query -d "query=SELECT ?a WHERE { ?a ?b ?c }" -## Running with docker-compose +## Running with docker compose -The `docker-compose` provided configuration will prepare three containers. -The first one called `skosmos-fuseki`, which uses the `stain/jena-fuseki` -image for Jena, and starts a container with 2 GB of memory and `admin` as -the user and password. The `docker-compose` service name of this container -is `fuseki`. +The `docker compose` provided configuration will prepare three containers. +The first one called `skosmos-fuseki`, which uses the Apache Jena +image for Fuseki, and starts a container with 2 GB of memory. The +`docker compose` service name of this container is `fuseki`. The second container is the `fuseki-cache`, a Varnish Cache container. It sits between the `skosmos-fuseki` and the `skosmos-web` (more on this below). The @@ -83,22 +104,22 @@ and `localhost:9031` respectively. To create the containers in this example setup, you can use this command from the `./dockerfiles/` directory: - docker-compose up -d + docker compose up -d Now Skosmos should be available at `http://localhost:9090/` from your host. See the [section below](#loading-vocabulary-data) to load vocabulary data. To stop: - docker-compose down + docker compose down ## Loading vocabulary data -After you have your container running, with either Docker or `docker-compose`, +After you have your container running, with either Docker or `docker compose`, you will need to load your vocabulary data. **NOTE**: In the example below, we use the Fuseki URL `localhost:3030`, which -should work for the Docker setup. If you used `docker-compose`, you will have +should work for the Docker setup. If you used `docker compose`, you will have to use `localhost:9030` instead. # load STW vocabulary data diff --git a/dockerfiles/config/skosmos.ttl b/dockerfiles/config/skosmos.ttl index 03cb37a34..5396dc887 100644 --- a/dockerfiles/config/skosmos.ttl +++ b/dockerfiles/config/skosmos.ttl @@ -1,85 +1,73 @@ @prefix : . @prefix rdf: . -@prefix tdb2: . -@prefix ja: . @prefix rdfs: . -@prefix fuseki: . +@prefix tdb2: . @prefix text: . @prefix skos: . +@prefix fuseki: . -ja:DatasetTxnMem rdfs:subClassOf ja:RDFDataset . -ja:MemoryDataset rdfs:subClassOf ja:RDFDataset . -ja:RDFDatasetOne rdfs:subClassOf ja:RDFDataset . -ja:RDFDatasetSink rdfs:subClassOf ja:RDFDataset . -ja:RDFDatasetZero rdfs:subClassOf ja:RDFDataset . - -tdb2:DatasetTDB rdfs:subClassOf ja:RDFDataset . -tdb2:DatasetTDB2 rdfs:subClassOf ja:RDFDataset . - -tdb2:GraphTDB rdfs:subClassOf ja:Model . -tdb2:GraphTDB2 rdfs:subClassOf ja:Model . - - - rdfs:subClassOf ja:RDFDataset . - - - rdfs:subClassOf ja:Model . - -text:TextDataset - rdfs:subClassOf ja:RDFDataset . +[] rdf:type fuseki:Server ; + fuseki:services ( + :skosmos_service + ) +. -:service_tdb_all a fuseki:Service ; - rdfs:label "TDB2+text skosmos" ; - fuseki:dataset :text_dataset ; - fuseki:name "skosmos" ; - fuseki:serviceQuery "query" , "" , "sparql" ; - fuseki:serviceReadGraphStore "get" ; - fuseki:serviceReadQuads "" ; +:skosmos_service rdf:type fuseki:Service ; + rdfs:label "TDB2+text skosmos" ; + fuseki:name "skosmos" ; + fuseki:serviceQuery "query" , "" , "sparql" ; + fuseki:serviceReadGraphStore "get" ; + fuseki:serviceReadQuads "" ; fuseki:serviceReadWriteGraphStore "data" ; - fuseki:serviceReadWriteQuads "" ; - fuseki:serviceUpdate "" , "update" ; - fuseki:serviceUpload "upload" . + fuseki:serviceReadWriteQuads "" ; + fuseki:serviceUpdate "" , "update" ; + fuseki:serviceUpload "upload" ; + fuseki:dataset :text_dataset +. -:text_dataset a text:TextDataset ; - text:dataset :tdb_dataset_readwrite ; - text:index :index_lucene . +:text_dataset rdf:type text:TextDataset ; + text:dataset :tdb_dataset_readwrite ; + text:index <#indexLucene> +. -:tdb_dataset_readwrite - a tdb2:DatasetTDB2 ; - # tdb2:unionDefaultGraph true ; - tdb2:location "/fuseki/databases/skosmos" . +:tdb_dataset_readwrite rdf:type tdb2:DatasetTDB2 ; + tdb2:location "/fuseki/databases/skosmos" ; + # tdb2:unionDefaultGraph true ; +. -:index_lucene a text:TextIndexLucene ; - text:directory ; - text:entityMap :entity_map ; - text:storeValues true . +<#indexLucene> rdf:type text:TextIndexLucene ; + text:directory ; + text:entityMap <#entMap> ; + text:storeValues true +. # Text index configuration for Skosmos -:entity_map a text:EntityMap ; - text:entityField "uri" ; - text:graphField "graph" ; +<#entMap> rdf:type text:EntityMap ; text:defaultField "pref" ; + text:entityField "uri" ; text:uidField "uid" ; text:langField "lang" ; + text:graphField "graph" ; text:map ( # skos:prefLabel [ text:field "pref" ; text:predicate skos:prefLabel ; - text:analyzer [ a text:LowerCaseKeywordAnalyzer ] + text:analyzer [ rdf:type text:LowerCaseKeywordAnalyzer ] ] # skos:altLabel [ text:field "alt" ; text:predicate skos:altLabel ; - text:analyzer [ a text:LowerCaseKeywordAnalyzer ] + text:analyzer [ rdf:type text:LowerCaseKeywordAnalyzer ] ] # skos:hiddenLabel [ text:field "hidden" ; text:predicate skos:hiddenLabel ; - text:analyzer [ a text:LowerCaseKeywordAnalyzer ] + text:analyzer [ rdf:type text:LowerCaseKeywordAnalyzer ] ] # skos:notation [ text:field "notation" ; text:predicate skos:notation ; - text:analyzer [ a text:LowerCaseKeywordAnalyzer ] + text:analyzer [ rdf:type text:LowerCaseKeywordAnalyzer ] ] - ) . + ) +. diff --git a/dockerfiles/docker-compose.yml b/dockerfiles/docker-compose.yml index f35a0e2c4..821b58e31 100644 --- a/dockerfiles/docker-compose.yml +++ b/dockerfiles/docker-compose.yml @@ -4,26 +4,31 @@ services: fuseki: container_name: skosmos-fuseki hostname: fuseki - image: stain/jena-fuseki + build: + context: ../dockerfiles/jena-fuseki2-docker + dockerfile: Dockerfile + args: + JENA_VERSION: 4.8.0 + command: --config=/fuseki/skosmos.ttl environment: - - ADMIN_PASSWORD=admin - - JVM_ARGS=-Xmx2g + - JAVA_OPTIONS=-Xmx2g -Xms1g ports: - - 9030:3030 - # You can uncomment the line below to have a local volume bound onto the container, or - # visit https://hub.docker.com/r/stain/jena-fuseki/ for other alternatives - # volumes: - # - ${PWD}/fuseki:/fuseki + - '9030:3030' volumes: - - type: bind - source: ./config/skosmos.ttl - target: /fuseki/configuration/skosmos.ttl + # You can uncomment the lines below to persist data used in the + # container. For more complete documentation about it, please + # consult the official Apache Jena docs at + # https://github.com/apache/jena/tree/main/jena-fuseki2/jena-fuseki-docker + # - ./databases:/fuseki/databases + # - ./logs:/fuseki/logs + - ./config/skosmos.ttl:/fuseki/skosmos.ttl + user: 'fuseki:fuseki' fuseki-cache: container_name: skosmos-fuseki-cache hostname: fuseki-cache image: varnish ports: - - 9031:80 + - '9031:80' volumes: - type: bind source: ./config/varnish-default.vcl @@ -35,7 +40,7 @@ services: context: .. dockerfile: dockerfiles/Dockerfile.ubuntu ports: - - 9090:80 + - '9090:80' depends_on: - fuseki - fuseki-cache diff --git a/dockerfiles/jena-fuseki2-docker/Dockerfile b/dockerfiles/jena-fuseki2-docker/Dockerfile new file mode 100644 index 000000000..f947fc459 --- /dev/null +++ b/dockerfiles/jena-fuseki2-docker/Dockerfile @@ -0,0 +1,124 @@ +## Licensed to the Apache Software Foundation (ASF) under one or more +## contributor license agreements. See the NOTICE file distributed with +## this work for additional information regarding copyright ownership. +## The ASF licenses this file to You under the Apache License, Version 2.0 +## (the "License"); you may not use this file except in compliance with +## the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +## Apache Jena Fuseki server Dockerfile. + +## This Dockefile builds a reduced footprint container. + +ARG JAVA_VERSION=17 + +ARG ALPINE_VERSION=3.17.1 +ARG JENA_VERSION="" + +# Internal, passed between stages. +ARG FUSEKI_DIR=/fuseki +ARG FUSEKI_JAR=jena-fuseki-server-${JENA_VERSION}.jar +ARG JAVA_MINIMAL=/opt/java-minimal + +## ---- Stage: Download and build java. +FROM eclipse-temurin:${JAVA_VERSION}-alpine AS base + +ARG JAVA_MINIMAL +ARG JENA_VERSION +ARG FUSEKI_DIR +ARG FUSEKI_JAR +ARG REPO=https://repo1.maven.org/maven2 +ARG JAR_URL=${REPO}/org/apache/jena/jena-fuseki-server/${JENA_VERSION}/${FUSEKI_JAR} + +RUN [ "${JENA_VERSION}" != "" ] || { echo -e '\n**** Set JENA_VERSION ****\n' ; exit 1 ; } +RUN echo && echo "==== Docker build for Apache Jena Fuseki ${JENA_VERSION} ====" && echo + +# Alpine: For objcopy used in jlink +RUN apk add --no-cache curl binutils + +## -- Fuseki installed and runs in /fuseki. +WORKDIR $FUSEKI_DIR + +## -- Download the jar file. +COPY download.sh . +RUN chmod a+x download.sh + +# Download, with check of the SHA1 checksum. +RUN ./download.sh --chksum sha1 "$JAR_URL" + +## -- Alternatives to download : copy already downloaded. +## COPY ${FUSEKI_JAR} . + +## Use Docker ADD - does not retry, does not check checksum, and may run every build. +## ADD "$JAR_URL" + +## -- Make reduced Java JDK + +ARG JDEPS_EXTRA="jdk.crypto.cryptoki,jdk.crypto.ec" +RUN \ + JDEPS="$(jdeps --multi-release base --print-module-deps --ignore-missing-deps ${FUSEKI_JAR})" && \ + jlink \ + --compress 2 --strip-debug --no-header-files --no-man-pages \ + --output "${JAVA_MINIMAL}" \ + --add-modules "${JDEPS},${JDEPS_EXTRA}" + +ADD entrypoint.sh . +ADD log4j2.properties . + +## ---- Stage: Build runtime +FROM alpine:${ALPINE_VERSION} + +## Import ARGs +ARG JENA_VERSION +ARG JAVA_MINIMAL +ARG FUSEKI_DIR +ARG FUSEKI_JAR + +COPY --from=base /opt/java-minimal /opt/java-minimal +COPY --from=base /fuseki /fuseki + +WORKDIR $FUSEKI_DIR + +ARG LOGS=${FUSEKI_DIR}/logs +ARG DATA=${FUSEKI_DIR}/databases + +ARG JENA_USER=fuseki +ARG JENA_GROUP=$JENA_USER +ARG JENA_GID=1000 +ARG JENA_UID=1000 + +# Run as this user +# -H : no home directory +# -D : no password +RUN addgroup -g "${JENA_GID}" "${JENA_GROUP}" && \ + adduser "${JENA_USER}" -G "${JENA_GROUP}" -s /bin/ash -u "${JENA_UID}" -H -D + +RUN mkdir --parents "${FUSEKI_DIR}" && \ + chown -R $JENA_USER ${FUSEKI_DIR} + +USER $JENA_USER + +RUN \ + mkdir -p $LOGS && \ + mkdir -p $DATA && \ + chmod a+x entrypoint.sh + +## Default environment variables. +ENV \ + JAVA_HOME=${JAVA_MINIMAL} \ + JAVA_OPTIONS="-Xmx2048m -Xms2048m" \ + JENA_VERSION=${JENA_VERSION} \ + FUSEKI_JAR="${FUSEKI_JAR}" \ + FUSEKI_DIR="${FUSEKI_DIR}" + +EXPOSE 3030 + +ENTRYPOINT ["./entrypoint.sh" ] +CMD [] diff --git a/dockerfiles/jena-fuseki2-docker/README.md b/dockerfiles/jena-fuseki2-docker/README.md new file mode 100644 index 000000000..90b643917 --- /dev/null +++ b/dockerfiles/jena-fuseki2-docker/README.md @@ -0,0 +1 @@ +Commit: 26153afe9a229bb7a609d5406d899eb240ab385e diff --git a/dockerfiles/jena-fuseki2-docker/download.sh b/dockerfiles/jena-fuseki2-docker/download.sh new file mode 100755 index 000000000..211eca784 --- /dev/null +++ b/dockerfiles/jena-fuseki2-docker/download.sh @@ -0,0 +1,147 @@ +#!/bin/sh + +## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is an ash/dash script (it uses "local"), not a bash script. +# It can run in an Alpine image during a docker build. +# +# The advantage over using docker ADD is that it checks +# whether download file is already present and does not +# download each time. +# +# Shell script to download URL and check the checksum + +USAGE="Usage: $(basename "$0") --chksum [sha1|sha512] URL" + +if [ $# -eq 0 ] +then + echo "$USAGE" 2>&1 + exit 1 +fi + +CHKSUM_TYPE='unset' + +while [ $# -gt 0 ] ; do + case "$1" in + --chksum|-chksum|-sha|--sha) + if [ $# -lt 2 ] + then + echo "$USAGE" 1>&2 + exit 1 + fi + CHKSUM_TYPE=$2 + shift + shift + ;; + -h|--help) + echo "$USAGE" 1>&2 + exit 0 + ;; + -*) + echo "$USAGE" 1>&2 + exit 1 + ;; + *) + if [ $# -ne 1 ] + then + echo "$USAGE" 1>&2 + exit 1 + fi + URL="$1" + shift + ;; + esac +done + +case "${CHKSUM_TYPE}" in + unset) + echo "$USAGE" 1>&2 + exit 1 + ;; + sha*|md5) ;; + *) + echo "Bad checksum type: '$CHKSUM_TYPE' (must start 'sha' or be 'md5')" 2>&1 + exit 1 + ;; +esac + +## ---- Script starts ---- + +ARTIFACT_URL="${URL}" +ARTIFACT_NAME="$(basename "$ARTIFACT_URL")" + +# -------- Checksum details + +CHKSUM_EXT=".${CHKSUM_TYPE}" +CHKSUM_URL="${ARTIFACT_URL}${CHKSUM_EXT}" +CHKSUM_FILE="${ARTIFACT_NAME}${CHKSUM_EXT}" +CHKSUMPROG="${CHKSUM_TYPE}sum" +# -------- + +CURL_FETCH_OPTS="-s -S --fail --location --max-redirs 3" +if false +then + echo "ARTIFACT_URL=$ARTIFACT_URL" + echo "CHKSUM_URL=$CHKSUM_URL" +fi + +download() { # URL + local URL="$1" + local FN="$(basename "$URL")" + if [ ! -e "$FN" ] + then + echo "Fetching $URL" + curl $CURL_FETCH_OPTS "$URL" --output "$FN" \ + || { echo "Bad download of $FN" 2>&1 ; return 1 ; } + else + echo "$FN already present" + fi + return 0 +} + +checkChksum() { # Filename checksum + local FN="$1" + local CHKSUM="$2" + if [ ! -e "$FN" ] + then + echo "No such file: '$FN'" 2>&1 + exit 1 + fi + # NB Two spaces required for busybox + echo "$CHKSUM $FN" | ${CHKSUMPROG} -c > /dev/null +} + +download "$ARTIFACT_URL" || exit 1 + +if [ -z "$CHKSUM" ] +then + # Checksum not previously set. + # Extract from file, copes with variations in content (filename or not) + download "$CHKSUM_URL" || exit 1 + CHKSUM="$(cut -d' ' -f1 "$CHKSUM_FILE")" +fi + +checkChksum "${ARTIFACT_NAME}" "$CHKSUM" +if [ $? = 0 ] +then + echo "Good download: $ARTIFACT_NAME" +else + echo "BAD download !!!! $ARTIFACT_NAME" + echo "To retry: delete downloaded files and try again" + exit 1 +fi diff --git a/dockerfiles/jena-fuseki2-docker/entrypoint.sh b/dockerfiles/jena-fuseki2-docker/entrypoint.sh new file mode 100755 index 000000000..a4ab8cdc6 --- /dev/null +++ b/dockerfiles/jena-fuseki2-docker/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh +## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 + +## env | sort +exec "$JAVA_HOME/bin/java" $JAVA_OPTIONS -jar "${FUSEKI_DIR}/${FUSEKI_JAR}" "$@" diff --git a/dockerfiles/jena-fuseki2-docker/log4j2.properties b/dockerfiles/jena-fuseki2-docker/log4j2.properties new file mode 100644 index 000000000..07f360da5 --- /dev/null +++ b/dockerfiles/jena-fuseki2-docker/log4j2.properties @@ -0,0 +1,73 @@ +## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +status = error +name = PropertiesConfig + +## filters = threshold +## filter.threshold.type = ThresholdFilter +## filter.threshold.level = ALL + +appender.console.type = Console +appender.console.name = OUT +appender.console.target = SYSTEM_OUT +appender.console.layout.type = PatternLayout +## appender.console.layout.pattern = %d{HH:mm:ss} %-5p %-15c{1} :: %m%n +## Include date. +appender.console.layout.pattern = [%d{yyyy-MM-dd HH:mm:ss}] %-5p %-15c{1} :: %m%n + +## To a file. +## appender.file.type = File +## appender.file.name = FILE +## appender.file.fileName=/fuseki/logs/log.fuseki +## appender.file.layout.type=PatternLayout +## appender.file.layout.pattern = [%d{yyyy-MM-dd HH:mm:ss}] %-5p %-15c{1} :: %m%n + +rootLogger.level = INFO +rootLogger.appenderRef.stdout.ref = OUT + +logger.jena.name = org.apache.jena +logger.jena.level = INFO + +logger.arq-exec.name = org.apache.jena.arq.exec +logger.arq-exec.level = INFO + +logger.arq-info.name = org.apache.jena.arq.info +logger.arq-info.level = INFO + +logger.riot.name = org.apache.jena.riot +logger.riot.level = INFO + +logger.fuseki.name = org.apache.jena.fuseki +logger.fuseki.level = INFO + +logger.fuseki-fuseki.name = org.apache.jena.fuseki.Fuseki +logger.fuseki-fuseki.level = INFO + +logger.fuseki-server.name = org.apache.jena.fuseki.Server +logger.fuseki-server.level = INFO + +logger.fuseki-admin.name = org.apache.jena.fuseki.Admin +logger.fuseki-admin.level = INFO + +logger.jetty.name = org.eclipse.jetty +logger.jetty.level = WARN + +# May be useful to turn up to DEBUG if debugging HTTP communication issues +logger.apache-http.name = org.apache.http +logger.apache-http.level = WARN + +logger.shiro.name = org.apache.shiro +logger.shiro.level = WARN +# Hide bug in Shiro 1.5.x +logger.shiro-realm.name = org.apache.shiro.realm.text.IniRealm +logger.shiro-realm.level = ERROR + +# This goes out in NCSA format +appender.plain.type = Console +appender.plain.name = PLAIN +appender.plain.layout.type = PatternLayout +appender.plain.layout.pattern = %m%n + +logger.request-log.name = org.apache.jena.fuseki.Request +logger.request-log.additivity = false +logger.request-log.level = OFF +logger.request-log.appenderRef.plain.ref = PLAIN