From 58c13f54edb9707dbc4f035eeb7fe1894832e578 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 13:24:17 -0400 Subject: [PATCH 1/6] changes to get bigstitcher spark working --- Dockerfile | 8 ++- spark_custom_install | 148 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 2 deletions(-) create mode 100755 spark_custom_install diff --git a/Dockerfile b/Dockerfile index 55613b9..63c58b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,7 @@ ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-amazon-corretto #install maven hdf5-tools deps, 7zip (for zarr zipstore archiving) -RUN apt-get update && apt-get install -y maven hdf5-tools libblosc-dev p7zip-full && mkdir -p /opt/bin +RUN apt-get update && apt-get install -y maven hdf5-tools libblosc-dev p7zip-full libxtst-dev && mkdir -p /opt/bin ENV PATH $PATH:/opt/bin @@ -44,7 +44,11 @@ ENV PATH $PATH:/opt/bin RUN cd /opt && git clone https://github.com/saalfeldlab/n5-utils && cd n5-utils && ./install /opt/bin #install bigstitcher-spark, and customize launcher to include args for mem and cpu -RUN cd /opt && git clone https://github.com/akhanf/BigStitcher-Spark.git && cd BigStitcher-Spark && ./install -t 32 -m 128 && cp -v affine-fusion /opt/bin && cp -v target/BigStitcher-Spark-0.0.2-SNAPSHOT.jar /opt/bin +RUN cd /opt && git clone https://github.com/akhanf/BigStitcher-Spark.git +COPY spark_custom_install /opt/BigStitcher-Spark/install +RUN cd BigStitcher-Spark && ./install -t 32 -m 128 + +ENV PATH $PATH:/opt/BigStitcher-Spark # Install Fiji. RUN mkdir /opt/fiji \ diff --git a/spark_custom_install b/spark_custom_install new file mode 100755 index 0000000..b5385f9 --- /dev/null +++ b/spark_custom_install @@ -0,0 +1,148 @@ +#!/bin/bash + +# This script is shamelessly extended from https://github.com/saalfeldlab/n5-utils, thanks @axtimwalde & co! + +USERTHREADS="-1" +USERMEM="-1" + +while [[ $# -gt 0 ]]; do + case $1 in + -t|--threads) + USERTHREADS="$2" + shift # past argument + shift # past value + ;; + -m|--mem) + USERMEM="$2" + shift # past argument + shift # past value + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + esac +done + +if [ $USERTHREADS == "-1" ]; then + echo "You did not define the number of threads for Java/Spark can use, will be set automatically. You could do it by e.g.: './install -t 8' for 8 threads." +fi + +if [ $USERMEM == "-1" ]; then + echo "You did not define the memory that Java/Spark can use, will be set automatically. You could do it by e.g.: './install -m 64' for 64GB of RAM." +fi + + +if [ $USERMEM == "-1" ]; then + # check for operating system + if [[ "$OSTYPE" == "linux-gnu" ]]; then + echo "Assuming on Linux operating system" + MEM=$(cat /proc/meminfo | grep MemTotal | sed s/^MemTotal:\\\s*\\\|\\\s\\+[^\\\s]*$//g) + MEMGB=$(($MEM/1024/1024)) + MEM=$((($MEMGB/5)*4)) + elif [[ "$OSTYPE" == "darwin"* ]]; then + echo "Assuming on MacOS X operating system" + # sysctl returns total hardware memory size in bytes + MEM=$(sysctl hw.memsize | grep hw.memsize | sed s/hw.memsize://g) + MEMGB=$(($MEM/1024/1024/1024)) + MEM=$((($MEMGB/5)*4)) + else + echo "ERROR - Operating system (arg2) must be either linux or osx to determine max memory. Please specify max memory by e.g.: './install -m 64' for 64GB of RAM" + exit + fi + + echo "Available memory:" $MEMGB "GB, setting Java memory limit to" $MEM "GB" +else + MEM=$USERMEM + echo "Setting Java memory limit to" $MEM "GB" +fi + +if [ $USERTHREADS == "-1" ]; then + # check for operating system + if [[ "$OSTYPE" == "linux-gnu" ]]; then + if [ $USERMEM != "-1" ]; then + echo "Assuming on Linux operating system" + fi + THREADS=$(nproc --all) + elif [[ "$OSTYPE" == "darwin"* ]]; then + if [ $USERMEM != "-1" ]; then + echo "Assuming on MacOS X operating system" + fi + # sysctl returns number of physical cores + THREADS=$(sysctl -n hw.ncpu) + else + echo "ERROR - Operating system (arg2) must be either linux or osx to determine number of physical cores. Please specify physical by e.g.: './install -t 8' to use 8 threads." + exit + fi + echo "Available threads:" $THREADS", setting Java/Sparks threads accordingly." +else + THREADS=$USERTHREADS + echo "Setting Java/Spark number of threads to" $THREADS +fi + + +VERSION="0.0.2-SNAPSHOT" +INSTALL_DIR=$(pwd) +#INSTALL_DIR=${1:-$(pwd)} + +echo "" +echo "Installing into $INSTALL_DIR (for local execution)" + +echo 'Building the code' + +sleep 2 + +#mvn clean install +#echo 'Building a farjar, which can also be used for cluster/cloud execution' +mvn clean install -P fatjar -gs /opt/BigStitcher-Spark/settings.xml +mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath + +# function that installs one command +# $1 - command name +# $2 - java class containing the functionality +install_command () { + echo "Installing '$1' command into" $INSTALL_DIR + + echo '#!/bin/bash' > $1 + echo '' >> $1 + echo "JAR=/opt/BigStitcher-Spark/repo/net/preibisch/BigStitcher-Spark/${VERSION}/BigStitcher-Spark-${VERSION}.jar" >> $1 + echo 'java \' >> $1 + echo " -Xmx${MEM}g -Dspark.master=local[${THREADS}] \\" >> $1 +# echo ' -XX:+UseConcMarkSweepGC \' >> $1 + echo -n ' -cp $JAR:' >> $1 + echo -n $(cat cp.txt) >> $1 + echo ' \' >> $1 + echo ' '$2' "$@"' >> $1 + + chmod a+x $1 +} + +echo 'Installing workflow tools ...' + +install_command resave "net.preibisch.bigstitcher.spark.SparkResaveN5" +install_command detect-interestpoints "net.preibisch.bigstitcher.spark.SparkInterestPointDetection" +install_command match-interestpoints "net.preibisch.bigstitcher.spark.SparkGeometricDescriptorMatching" +install_command stitching "net.preibisch.bigstitcher.spark.SparkPairwiseStitching" +install_command solver "net.preibisch.bigstitcher.spark.Solver" +install_command affine-fusion "net.preibisch.bigstitcher.spark.SparkAffineFusion" +install_command nonrigid-fusion "net.preibisch.bigstitcher.spark.SparkNonRigidFusion" + +echo 'Installing utils ...' + +install_command downsample "net.preibisch.bigstitcher.spark.SparkDownsample" +install_command clear-interestpoints "net.preibisch.bigstitcher.spark.ClearInterestPoints" +install_command clear-registrations "net.preibisch.bigstitcher.spark.ClearRegistrations" +install_command transform-points "net.preibisch.bigstitcher.spark.TransformPoints" + + +if [ $(pwd) == "$INSTALL_DIR" ]; then + echo "Installation directory equals current directory, we are done." +else + echo "Creating directory $INSTALL_DIR and moving files..." + mkdir -p $INSTALL_DIR + mv affine-fusion $INSTALL_DIR/ +fi + +rm cp.txt + +echo "Installation finished." From ac260a46825f50845ea6ea5330f1bb8034e94562 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 16:49:22 -0400 Subject: [PATCH 2/6] fix for mem and threads --- spark_custom_install | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/spark_custom_install b/spark_custom_install index b5385f9..4c025c3 100755 --- a/spark_custom_install +++ b/spark_custom_install @@ -94,27 +94,30 @@ sleep 2 #mvn clean install #echo 'Building a farjar, which can also be used for cluster/cloud execution' -mvn clean install -P fatjar -gs /opt/BigStitcher-Spark/settings.xml -mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath +mvn clean install -P fatjar -Dmaven.repo.local=/opt/BigStitcher-Spark/repo +mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath -Dmaven.repo.local=/opt/BigStitcher-Spark/repo # function that installs one command # $1 - command name # $2 - java class containing the functionality install_command () { - echo "Installing '$1' command into" $INSTALL_DIR - - echo '#!/bin/bash' > $1 - echo '' >> $1 - echo "JAR=/opt/BigStitcher-Spark/repo/net/preibisch/BigStitcher-Spark/${VERSION}/BigStitcher-Spark-${VERSION}.jar" >> $1 - echo 'java \' >> $1 - echo " -Xmx${MEM}g -Dspark.master=local[${THREADS}] \\" >> $1 -# echo ' -XX:+UseConcMarkSweepGC \' >> $1 - echo -n ' -cp $JAR:' >> $1 - echo -n $(cat cp.txt) >> $1 - echo ' \' >> $1 - echo ' '$2' "$@"' >> $1 - - chmod a+x $1 + echo "Installing '$1' command into" $INSTALL_DIR + + echo '#!/bin/bash' > $1 + echo '' >> $1 + echo 'MEM=$1' >> $1 + echo 'THREADS=$2' >> $1 + echo 'shift 2' >> $1 + echo "JAR=/opt/BigStitcher-Spark/repo/net/preibisch/BigStitcher-Spark/${VERSION}/BigStitcher-Spark-${VERSION}.jar" >> $1 + echo 'java \' >> $1 + echo " -Xmx${MEM}g -Dspark.master=local[${THREADS}] \\" >> $1 + # echo ' -XX:+UseConcMarkSweepGC \' >> $1 + echo -n ' -cp $JAR:' >> $1 + echo -n $(cat cp.txt) >> $1 + echo ' \' >> $1 + echo ' '$2' "$@"' >> $1 + + chmod a+x $1 } echo 'Installing workflow tools ...' From 982bc38332cc20be427534be34c726dccd0045bb Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 17:06:48 -0400 Subject: [PATCH 3/6] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 63c58b1..07dab5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ RUN cd /opt && git clone https://github.com/saalfeldlab/n5-utils && cd n5-utils #install bigstitcher-spark, and customize launcher to include args for mem and cpu RUN cd /opt && git clone https://github.com/akhanf/BigStitcher-Spark.git COPY spark_custom_install /opt/BigStitcher-Spark/install -RUN cd BigStitcher-Spark && ./install -t 32 -m 128 +RUN cd /opt/BigStitcher-Spark && ./install -t 32 -m 128 ENV PATH $PATH:/opt/BigStitcher-Spark From 6b83028361d96e8c308b47364a4833be6a3f0884 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 20:26:05 -0400 Subject: [PATCH 4/6] fix spark script --- spark_custom_install | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark_custom_install b/spark_custom_install index 4c025c3..57771eb 100755 --- a/spark_custom_install +++ b/spark_custom_install @@ -105,12 +105,12 @@ install_command () { echo '#!/bin/bash' > $1 echo '' >> $1 - echo 'MEM=$1' >> $1 + echo 'MEM_GB=$1' >> $1 echo 'THREADS=$2' >> $1 echo 'shift 2' >> $1 echo "JAR=/opt/BigStitcher-Spark/repo/net/preibisch/BigStitcher-Spark/${VERSION}/BigStitcher-Spark-${VERSION}.jar" >> $1 echo 'java \' >> $1 - echo " -Xmx${MEM}g -Dspark.master=local[${THREADS}] \\" >> $1 + echo " -Xmx\${MEM}g -Dspark.master=local[$\{THREADS}] \\" >> $1 # echo ' -XX:+UseConcMarkSweepGC \' >> $1 echo -n ' -cp $JAR:' >> $1 echo -n $(cat cp.txt) >> $1 From f8326bfdb82ea46aa2420d1efc7f47c4640ba2b2 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 20:55:46 -0400 Subject: [PATCH 5/6] update spark install --- spark_custom_install | 62 ++++---------------------------------------- 1 file changed, 5 insertions(+), 57 deletions(-) diff --git a/spark_custom_install b/spark_custom_install index 57771eb..afd1798 100755 --- a/spark_custom_install +++ b/spark_custom_install @@ -28,58 +28,6 @@ if [ $USERTHREADS == "-1" ]; then echo "You did not define the number of threads for Java/Spark can use, will be set automatically. You could do it by e.g.: './install -t 8' for 8 threads." fi -if [ $USERMEM == "-1" ]; then - echo "You did not define the memory that Java/Spark can use, will be set automatically. You could do it by e.g.: './install -m 64' for 64GB of RAM." -fi - - -if [ $USERMEM == "-1" ]; then - # check for operating system - if [[ "$OSTYPE" == "linux-gnu" ]]; then - echo "Assuming on Linux operating system" - MEM=$(cat /proc/meminfo | grep MemTotal | sed s/^MemTotal:\\\s*\\\|\\\s\\+[^\\\s]*$//g) - MEMGB=$(($MEM/1024/1024)) - MEM=$((($MEMGB/5)*4)) - elif [[ "$OSTYPE" == "darwin"* ]]; then - echo "Assuming on MacOS X operating system" - # sysctl returns total hardware memory size in bytes - MEM=$(sysctl hw.memsize | grep hw.memsize | sed s/hw.memsize://g) - MEMGB=$(($MEM/1024/1024/1024)) - MEM=$((($MEMGB/5)*4)) - else - echo "ERROR - Operating system (arg2) must be either linux or osx to determine max memory. Please specify max memory by e.g.: './install -m 64' for 64GB of RAM" - exit - fi - - echo "Available memory:" $MEMGB "GB, setting Java memory limit to" $MEM "GB" -else - MEM=$USERMEM - echo "Setting Java memory limit to" $MEM "GB" -fi - -if [ $USERTHREADS == "-1" ]; then - # check for operating system - if [[ "$OSTYPE" == "linux-gnu" ]]; then - if [ $USERMEM != "-1" ]; then - echo "Assuming on Linux operating system" - fi - THREADS=$(nproc --all) - elif [[ "$OSTYPE" == "darwin"* ]]; then - if [ $USERMEM != "-1" ]; then - echo "Assuming on MacOS X operating system" - fi - # sysctl returns number of physical cores - THREADS=$(sysctl -n hw.ncpu) - else - echo "ERROR - Operating system (arg2) must be either linux or osx to determine number of physical cores. Please specify physical by e.g.: './install -t 8' to use 8 threads." - exit - fi - echo "Available threads:" $THREADS", setting Java/Sparks threads accordingly." -else - THREADS=$USERTHREADS - echo "Setting Java/Spark number of threads to" $THREADS -fi - VERSION="0.0.2-SNAPSHOT" INSTALL_DIR=$(pwd) @@ -94,8 +42,8 @@ sleep 2 #mvn clean install #echo 'Building a farjar, which can also be used for cluster/cloud execution' -mvn clean install -P fatjar -Dmaven.repo.local=/opt/BigStitcher-Spark/repo -mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath -Dmaven.repo.local=/opt/BigStitcher-Spark/repo +mvn clean install -P fatjar -gs /opt/BigStitcher-Spark/settings.xml +mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath # function that installs one command # $1 - command name @@ -105,13 +53,13 @@ install_command () { echo '#!/bin/bash' > $1 echo '' >> $1 - echo 'MEM_GB=$1' >> $1 + echo 'if [ "$#" -lt 2 ]; then echo "Usage: $0 "; exit 1; fi' >> $1 + echo 'MEM=$1' >> $1 echo 'THREADS=$2' >> $1 echo 'shift 2' >> $1 echo "JAR=/opt/BigStitcher-Spark/repo/net/preibisch/BigStitcher-Spark/${VERSION}/BigStitcher-Spark-${VERSION}.jar" >> $1 echo 'java \' >> $1 - echo " -Xmx\${MEM}g -Dspark.master=local[$\{THREADS}] \\" >> $1 - # echo ' -XX:+UseConcMarkSweepGC \' >> $1 + echo ' -Xmx${MEM}g -Dspark.master=local[${THREADS}] \' >> $1 echo -n ' -cp $JAR:' >> $1 echo -n $(cat cp.txt) >> $1 echo ' \' >> $1 From 4631faa36cdbaab398a896a872191beb6d2b6fc4 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 22:16:18 -0400 Subject: [PATCH 6/6] fix local repo spark --- spark_custom_install | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spark_custom_install b/spark_custom_install index afd1798..24ec763 100755 --- a/spark_custom_install +++ b/spark_custom_install @@ -41,9 +41,10 @@ echo 'Building the code' sleep 2 #mvn clean install -#echo 'Building a farjar, which can also be used for cluster/cloud execution' -mvn clean install -P fatjar -gs /opt/BigStitcher-Spark/settings.xml -mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath +#echo 'Building a fatjar, which can also be used for cluster/cloud execution' +mvn clean install -P fatjar -Dmaven.repo.local=/opt/BigStitcher-Spark/repo +mvn -Dmdep.outputFile=cp.txt -Dmdep.includeScope=runtime dependency:build-classpath -Dmaven.repo.local=/opt/BigStitcher-Spark/repo + # function that installs one command # $1 - command name