Skip to content

Commit

Permalink
Initial version for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
dameikle committed Jan 9, 2020
0 parents commit 18bb384
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.svn
target
dependency-reduced-pom.xml
.idea
.classpath
.project
.settings
*.iml
*.ipr
*.iws
*.bin
nbactions.xml
nb-configuration.xml
*.DS_Store
*.tmp-inception
*.snap
.*.swp
tika-deployment/tika-snap-app/parts/
tika-deployment/tika-snap-app/prime/
tika-deployment/tika-snap-app/snap/
tika-deployment/tika-snap-app/stage/
tika-deployment/tika-snap-app/test/
tika-deployment/tika-snap-server/parts/
tika-deployment/tika-snap-server/prime/
tika-deployment/tika-snap-server/snap/
tika-deployment/tika-snap-server/stage/

9 changes: 9 additions & 0 deletions .travis.ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
language: bash
services: docker
env:
matrix:
- VERSION=1.23
- VERSION=1.22
script:
- docker-tool.sh build $VERSION
- docker-tool.sh test $VERSION
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# tika-docker

Work in Progress
68 changes: 68 additions & 0 deletions docker-tool.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

while getopts ":h" opt; do
case ${opt} in
h )
echo "Usage:"
echo " docker-tool.sh -h Display this help message."
echo " docker-tool.sh build <TIKA_VERSION> Builds image(s) for <TIKA_VERSION>."
echo " docker-tool.sh test <TIKA_VERSION> Tests image(s) for <TIKA_VERSION>."
echo " docker-tool.sh publish <TIKA_VERSION> Publishes image(s) for <TIKA_VERSION> to Docker Hub."
exit 0
;;
\? )
echo "Invalid Option: -$OPTARG" 1>&2
exit 1
;;
esac
done

test_docker_image() {
docker run -d --name "$1" -p 9998:9998 apache/tika:"$1"
sleep 10
curl http://localhost:9998/version
docker kill "$1"
docker rm "$1"
}

shift $((OPTIND -1))
subcommand=$1; shift
version=$1; shift

case "$subcommand" in
build)
# Build slim version with minimal dependencies
docker build -t apache/tika:${version} --build-arg TIKA_VERSION=${version} - < minimal/Dockerfile
# Build full version with OCR, Fonts and GDAL
docker build -t apache/tika:${version}-full --build-arg TIKA_VERSION=${version} - < full/Dockerfile
;;

test)
# Test minimal image
test_docker_image ${version}
# Test full image
test_docker_image "${version}-full"
;;

publish)
echo "Does nothing until we get Docker Hub access setup under Apache Organisation"
;;

esac
50 changes: 50 additions & 0 deletions full/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
FROM ubuntu:bionic as base
RUN apt-get update

FROM base as dependencies

RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless gdal-bin tesseract-ocr \
tesseract-ocr-eng tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-spa tesseract-ocr-deu

RUN echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y curl xfonts-utils fonts-freefont-ttf fonts-liberation ttf-mscorefonts-installer wget cabextract

FROM dependencies as fetch_tika
ARG TIKA_VERSION

ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
TIKA_VERSION=$TIKA_VERSION

RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
&& curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
&& echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
&& wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
&& sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
&& wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
&& sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
&& gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar

FROM dependencies as runtime
RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ARG TIKA_VERSION
ENV TIKA_VERSION=$TIKA_VERSION
COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar

EXPOSE 9998
ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0

LABEL maintainer="Apache Tika Developers [email protected]"
45 changes: 45 additions & 0 deletions minimal/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
FROM ubuntu:bionic as base
RUN apt-get update

FROM base as dependencies
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-11-jre-headless

FROM dependencies as fetch_tika
ARG TIKA_VERSION

ENV NEAREST_TIKA_SERVER_URL="https://www.apache.org/dyn/closer.cgi/tika/tika-server-${TIKA_VERSION}.jar?filename=tika/tika-server-${TIKA_VERSION}.jar&action=download" \
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar" \
DEFAULT_TIKA_SERVER_ASC_URL="https://www.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/tika-server-${TIKA_VERSION}.jar.asc" \
TIKA_VERSION=$TIKA_VERSION

RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gnupg2 curl wget \
&& curl -sSL https://www.apache.org/dist/tika/KEYS | gpg --import \
&& echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
&& wget $NEAREST_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
&& sh -c "[ -f /tika-server-${TIKA_VERSION}.jar ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /tika-server-${TIKA_VERSION}.jar || rm /tika-server-${TIKA_VERSION}.jar \
&& wget $DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
&& sh -c "[ -f /tika-server-${TIKA_VERSION}.jar.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-${TIKA_VERSION}.jar.asc || rm /tika-server-${TIKA_VERSION}.jar.asc \
&& gpg --verify /tika-server-${TIKA_VERSION}.jar.asc /tika-server-${TIKA_VERSION}.jar

FROM dependencies as runtime
RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ARG TIKA_VERSION
ENV TIKA_VERSION=$TIKA_VERSION
COPY --from=fetch_tika /tika-server-${TIKA_VERSION}.jar /tika-server-${TIKA_VERSION}.jar

EXPOSE 9998
ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0

LABEL maintainer="Apache Tika Developers [email protected]"

0 comments on commit 18bb384

Please sign in to comment.