diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml new file mode 100644 index 0000000..4934b19 --- /dev/null +++ b/.github/workflows/make_release.yml @@ -0,0 +1,128 @@ +name: Build binaries +on: + push: + tags: + - "v*.*.*" + branches: + - themisto-release-testing + +jobs: + build_linux-x86_64: + runs-on: ubuntu-latest + container: phusion/holy-build-box-64:3.0.2 + steps: + - name: Install wget + id: install-wget + run: yum install -y wget + + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + + - name: Download build script + id: dl-build-script + run: wget https://raw.githubusercontent.com/algbio/themisto/${{ github.ref_name }}/deploy/linux/build.sh + + - name: Compile binary in Holy Build Box container + id: compile-in-container + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} + + - name: Upload linux-x86_64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: themisto-${{ github.ref_name }}-x86_64-redhat-linux + path: /io/themisto-${{ github.ref_name }}-x86_64-redhat-linux.tar.gz + + build_macOS-x86_64: + runs-on: ubuntu-latest + container: ghcr.io/shepherdjerred/macos-cross-compiler:latest + steps: + - name: Install wget + id: install-wget + run: apt install -y wget + + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + + - name: Download x86_64 toolchain file + id: dl-toolchain-file + run: wget https://raw.githubusercontent.com/algbio/themisto/${{ github.ref_name }}/deploy/macOS/x86-64-toolchain_GNU.cmake && cp x86-64-toolchain_GNU.cmake /io/x86-64-toolchain_GNU.cmake && cp x86-64-toolchain_GNU.cmake /x86-64-toolchain_GNU.cmake + + - name: Download build script + id: dl-build-script + run: wget https://raw.githubusercontent.com/algbio/themisto/${{ github.ref_name }}/deploy/macOS/build.sh + + - name: Compile binary in macOS Cross Compiler container + id: compile-in-container + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} x86-64 + + - name: Upload macOS-x86_64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: themisto-${{ github.ref_name }}-x86_64-apple-darwin22 + path: /io/themisto-${{ github.ref_name }}-x86_64-apple-darwin22.tar.gz + + build_macOS-arm64: + runs-on: ubuntu-latest + container: ghcr.io/shepherdjerred/macos-cross-compiler:latest + steps: + - name: Install wget + id: install-wget + run: apt install -y wget + + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + + - name: Download arm64 toolchain file + id: dl-toolchain-file + run: wget https://raw.githubusercontent.com/algbio/themisto/${{ github.ref_name }}/deploy/macOS/arm64-toolchain_GNU.cmake && cp arm64-toolchain_GNU.cmake /io/arm64-toolchain_GNU.cmake && cp arm64-toolchain_GNU.cmake /arm64-toolchain_GNU.cmake + + - name: Download build script + id: dl-build-script + run: wget https://raw.githubusercontent.com/algbio/themisto/${{ github.ref_name }}/deploy/macOS/build.sh + + - name: Compile binary in macOS Cross Compiler container + id: compile-in-container + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} arm64 + + - name: Upload macOS-arm64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: themisto-${{ github.ref_name }}-aarch64-apple-darwin22 + path: /io/themisto-${{ github.ref_name }}-aarch64-apple-darwin22.tar.gz + + create-release: + runs-on: ubuntu-latest + + needs: [ build_linux-x86_64, build_macOS-x86_64, build_macOS-arm64 ] + + steps: + - uses: actions/checkout@v2 + + - uses: actions/download-artifact@v2 + with: + path: build + + - name: Organise files + shell: bash + run: | + cp build/themisto-${{ github.ref_name }}-aarch64-apple-darwin22/themisto-${{ github.ref_name }}-aarch64-apple-darwin22.tar.gz . + cp build/themisto-${{ github.ref_name }}-x86_64-apple-darwin22/themisto-${{ github.ref_name }}-x86_64-apple-darwin22.tar.gz . + cp build/themisto-${{ github.ref_name }}-x86_64-redhat-linux/themisto-${{ github.ref_name }}-x86_64-redhat-linux.tar.gz . + + - name: Create release + id: create_release + uses: softprops/action-gh-release@v1 + with: + name: Release ${{ github.ref_name }} + draft: false + prerelease: false + fail_on_unmatched_files: true + generate_release_notes: true + files: | + themisto-*.tar.gz diff --git a/README.md b/README.md index abd298e..1eb50f4 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,18 @@ Themisto version 3.2.2 is out, fixing a bug where the output was sometimes not f # About Themisto Themisto is a succinct colored k-mer index supporting pseudo-alignment against a database of reference sequences similar to the tool Kallisto, Bifrost and Metagraph. For more information, see the [preprint](https://www.biorxiv.org/content/10.1101/2023.02.24.529942v3). This software is currently developed by the [Compressed Data Structures group](https://www.helsinki.fi/en/researchgroups/algorithmic-bioinformatics/teams/compressed-data-structures) at the University of Helsinki. -## Requirements +## Installation +Precompiled binaries are available for +- Linux x86_64 +- macOS arm64 +- macOS x86_64 -We currently support only Linux and macOS. For compilation, you will need a C++20 compliant compiler with OpenMP support, CMake v3.1 or newer, and [Rust](https://www.rust-lang.org/tools/install) 1.77. If compiling with g++, make sure that the version is at least g++-10, or you might run into compilation errors with the standard library <filesystem> header. +Visit the [Releases page](https://github.com/algbio/themisto/releases) to download a binary. ## Compiling +### Requirements + +We currently support only Linux and macOS. For compilation, you will need a C++20 compliant compiler with OpenMP support, CMake v3.1 or newer, and [Rust](https://www.rust-lang.org/tools/install) 1.77. If compiling with g++, make sure that the version is at least g++-10, or you might run into compilation errors with the standard library <filesystem> header. ### Linux diff --git a/deploy/linux/build.sh b/deploy/linux/build.sh new file mode 100755 index 0000000..08d190e --- /dev/null +++ b/deploy/linux/build.sh @@ -0,0 +1,82 @@ +#!/bin/bash +## Build script for compiling Themisto for Linux x86-64. +## Call this from `compile.sh` with the desired tag as the argument. +## Adapted from the scripts in https://github.com/tmaklin/biobins + +set -e + +VER=$1 +if [[ -z $VER ]]; then + echo "Error: specify version" + exit; +fi + +mkdir /io/tmp && cd /io/tmp + +## Install git and gcc-10 +yum -y install devtoolset-10-* +yum -y update + +## Change hbb environment to use gcc-10 +sed 's/DEVTOOLSET_VERSION=9/DEVTOOLSET_VERSION=10/g' /hbb/activate_func.sh > /hbb/activate_func_10.sh +mv --force /hbb/activate_func_10.sh /hbb/activate_func.sh + +# Activate Holy Build Box environment. +source /hbb_exe/activate +export LDFLAGS="-L/lib64 -static-libstdc++" +set -x + +## Setup paths so cmake finds the correct toolchain +export CC="/opt/rh/devtoolset-10/root/usr/bin/gcc" +export CXX="/opt/rh/devtoolset-10/root/usr/bin/g++" + +yum -y install curl libcurl-devel +yum -y install git + +## Overwrite HBB git which doesn't support https +rm --force /hbb/bin/git +ln -s /usr/bin/git /hbb/bin/git + +## Setup rust +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > rustup.sh +chmod +x rustup.sh +export CARGO_HOME="/.cargo" +export RUSTUP_HOME="/.rustup" +./rustup.sh -y --default-toolchain stable --profile minimal +. "/.cargo/env" +rustup target add x86_64-unknown-linux-gnu + +## Clone Themisto +git clone https://github.com/algbio/Themisto +cd Themisto +git checkout ${VER} +git submodule update --init --recursive + +## Specify ggcat target architectures +mkdir -p ggcat/.cargo +echo "[build]" >> ggcat/.cargo/config.toml +echo "target = \"x86_64-unknown-linux-gnu\"" >> ggcat/.cargo/config.toml +sed 's/target\/release/target\/x86_64-unknown-linux-gnu\/release/g' ggcat/crates/capi/ggcat-cpp-api/Makefile | sed 's/fPIE/fPIE -march=x86-64 -mtune=generic -m64 -fPIC/g' > Makefile.tmp +mv Makefile.tmp ggcat/crates/capi/ggcat-cpp-api/Makefile + +cd build +cmake -DCMAKE_C_FLAGS="-march=x86-64 -mtune=generic -m64" \ + -DCMAKE_CXX_FLAGS="-march=x86-64 -mtune=generic -m64" \ + -DCMAKE_BUILD_TYPE=Release \ + -DROARING_DISABLE_NATIVE=ON \ + -DMAX_KMER_LENGTH=31 .. + +make VERBOSE=1 -j + +## Gather distributable +target=themisto-${VER}-$(gcc -v 2>&1 | grep "^Target" | cut -f2 -d':' | sed 's/[[:space:]]*//g') +path=/io/tmp/$target +mkdir $path +cp ../build/bin/themisto $path/ +cp ../README.md $path/ +cp ../LICENSE.txt $path/ +cd /io/tmp +tar -zcvf $target.tar.gz $target +mv $target.tar.gz /io/ +cd /io/ +rm -rf tmp cache diff --git a/deploy/linux/compile.sh b/deploy/linux/compile.sh new file mode 100755 index 0000000..189a869 --- /dev/null +++ b/deploy/linux/compile.sh @@ -0,0 +1,7 @@ +docker run -t -i --rm \ + -v `pwd`:/io \ + -e CC='ccache gcc' \ + -e CXX='ccache g++' \ + -e CCACHE_DIR='/io/cache' \ + phusion/holy-build-box-64:3.0.2 \ + bash /io/build.sh $1 diff --git a/deploy/macOS/arm64-toolchain_GNU.cmake b/deploy/macOS/arm64-toolchain_GNU.cmake new file mode 100644 index 0000000..9ab98f5 --- /dev/null +++ b/deploy/macOS/arm64-toolchain_GNU.cmake @@ -0,0 +1,23 @@ +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR X86) + +set(TOOLCHAIN_NAME aarch64-apple-darwin22) +set(COMPILER_PATH "/gcc/bin") +set(TOOLCHAIN_PATH "/cctools/bin") +set(LLVM_PATH "/usr/lib/llvm-14/bin") + +set(CMAKE_C_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc") +set(CMAKE_CXX_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-g++") +set(CMAKE_ASM_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc") +set(CMAKE_RC_COMPILER "${LLVM_PATH}/llvm-rc") +set(CMAKE_LINKER "${TOOLCHAIN_PATH}/${TOOLCHAIN_NAME}-ld") +set(CMAKE_ADDR2LINE "${LLVM_PATH}/llvm-addr2line") +set(CMAKE_AR "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-ar") +set(CMAKE_DLLTOOL "${LLVM_PATH}/llvm-dlltool") +set(CMAKE_MT "${LLVM_PATH}/llvm-mt") +set(CMAKE_NM "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-nm") +set(CMAKE_OBJCOPY "${LLVM_PATH}/llvm-objcopy") +set(CMAKE_OBJDUMP "${LLVM_PATH}/llvm-objdump") +set(CMAKE_RANLIB "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-ranlib") +set(CMAKE_READELF "${LLVM_PATH}/llvm-readelf") +set(CMAKE_STRIP "${TOOLCHAIN_PATH}/${TOOLCHAIN_NAME}-strip") diff --git a/deploy/macOS/build.sh b/deploy/macOS/build.sh new file mode 100755 index 0000000..64c601f --- /dev/null +++ b/deploy/macOS/build.sh @@ -0,0 +1,130 @@ +#!/bin/bash +## Build script for cross-compiling Themisto for macOS x86-64 or arm64. +## Call this from `compile_in_docker.sh` unless you know what you're doing. + +set -exo pipefail + +VER=$1 +if [[ -z $VER ]]; then + echo "Error: specify version" + exit; +fi + +ARCH=$2 +if [[ -z $ARCH ]]; then + echo "Error: specify architecture (one of x86-64,arm64)" + exit; +fi + +apt update +apt install -y cmake git libomp5 libomp-dev curl + +rustup default stable + +mkdir /io/tmp +cd /io/tmp + +# Extract and enter source +git clone https://github.com/algbio/Themisto.git +cd Themisto +git checkout ${VER} +git submodule update --init --recursive + +mkdir -p ggcat/.cargo + +sed 's/cargo build/cargo build/g' ggcat/crates/capi/ggcat-cpp-api/Makefile > Makefile.tmp +mv Makefile.tmp ggcat/crates/capi/ggcat-cpp-api/Makefile + +cd build +target_arch="" +if [ "$ARCH" = "x86-64" ]; then + # Rust toolchain + rustup target add x86_64-apple-darwin + + echo "[build]" >> ../ggcat/.cargo/config.toml + echo "target = \"x86_64-apple-darwin\"" >> ../ggcat/.cargo/config.toml + echo "[target.x86_64-apple-darwin]" >> ../ggcat/.cargo/config.toml + echo "linker = \"x86_64-apple-darwin22-gcc\"" >> ../ggcat/.cargo/config.toml + + export CC="x86_64-apple-darwin22-gcc" + export CXX="x86_64-apple-darwin22-g++" + + ## Setup ggcat-cpp-api cargo config files for cross compilation + sed "s/cargo build/RUSTFLAGS='-L \/osxcross\/SDK\/MacOSX13.0.sdk\/usr\/lib' cargo build --target x86_64-apple-darwin/g" ../ggcat/crates/capi/ggcat-cpp-api/Makefile | sed 's/target\/release/target\/x86_64-apple-darwin\/release/g' | sed 's/fPIE/fPIE -march=x86-64 -mtune=generic -m64 -fPIC/g' | sed 's/ar cr/\/gcc\/bin\/x86_64-apple-darwin22-gcc-ar cr/g' > Makefile.tmp + mv Makefile.tmp ../ggcat/crates/capi/ggcat-cpp-api/Makefile + + ## Prevent sdsl-lite from building with native instructions + sed 's/-march=native/-march=x86-64/g' ../SBWT/sdsl-lite/CMakeLists.txt > CMakeLists.txt.tmp + mv CMakeLists.txt.tmp ../SBWT/sdsl-lite/CMakeLists.txt + + ## Setup ggcat-cpp-api cargo config files for cross compilation + cat ../ggcat/crates/capi/ggcat-cpp-api/Makefile | sed 's/fPIE/fPIE -march=x86-64 -mtune=generic -m64 -fPIC/g' > Makefile.tmp + mv Makefile.tmp ../ggcat/crates/capi/ggcat-cpp-api/Makefile + + # compile x86_64 + cmake -DCMAKE_TOOLCHAIN_FILE="/io/$ARCH-toolchain_GNU.cmake" \ + -DCMAKE_C_FLAGS="-march=$ARCH -mtune=generic -m64 -fPIC -fPIE" \ + -DCMAKE_CXX_FLAGS="-march=$ARCH -mtune=generic -m64 -fPIC -fPIE" \ + -DROARING_DISABLE_NATIVE=ON \ + -DBZIP2_LIBRARIES="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libbz2.tbd" -DBZIP2_INCLUDE_DIR="/osxcross/SDK/MacOSX13.0.sdk/usr/include" \ + -DZLIB="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libz.tbd" -DZLIB_INCLUDE_DIR="/osxcross/SDK/MacOSX13.0.sdk/usr/include" \ + -DZLIB_LIBRARY=="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libz.tbd" \ + -DMAX_KMER_LENGTH=31 \ + .. + target_arch="x86_64-apple-darwin22" +elif [ "$ARCH" = "arm64" ]; then + # Rust toolchain + rustup target add aarch64-apple-darwin + + echo "[build]" >> ../ggcat/.cargo/config.toml + echo "target = \"aarch64-apple-darwin\"" >> ../ggcat/.cargo/config.toml + echo "[target.aarch64-apple-darwin]" >> ../ggcat/.cargo/config.toml + echo "linker = \"aarch64-apple-darwin22-gcc\"" >> ../ggcat/.cargo/config.toml + + export CC="aarch64-apple-darwin22-gcc" + export CXX="aarch64-apple-darwin22-g++" + + ## Prevent KMC from compiling with x86 instructions + sed 's/^UNAME_S.*$/UNAME_S=Darwin/g' ../SBWT/KMC/Makefile | sed 's/^UNAME_M.*$/UNAME_M=arm64/g' | sed 's/^[[:space:]]*CC[[:space:]]*=.*$//g' > Makefile.tmp + sed 's/if(APPLE)/if(TRUE)/g' ../SBWT/KMC/CMakeLists.txt | sed 's/CMAKE_SYSTEM_PROCESSOR MATCHES "arm64"/TRUE/g' > CMakeLists.txt.tmp + mv Makefile.tmp ../SBWT/KMC/Makefile + mv CMakeLists.txt.tmp ../SBWT/KMC/CMakeLists.txt + + ## Prevent sdsl-lite from building with native instructions + sed 's/-msse4.2[[:space:]]*-march=native/-march=armv8-a/g' ../SBWT/sdsl-lite/CMakeLists.txt > CMakeLists.txt.tmp + mv CMakeLists.txt.tmp ../SBWT/sdsl-lite/CMakeLists.txt + + ## Setup ggcat-cpp-api cargo config files for cross compilation + sed "s/cargo build/RUSTFLAGS='-L \/osxcross\/SDK\/MacOSX13.0.sdk\/usr\/lib' cargo build --target aarch64-apple-darwin/g" ../ggcat/crates/capi/ggcat-cpp-api/Makefile | sed 's/target\/release/target\/aarch64-apple-darwin\/release/g' | sed 's/fPIE/fPIE -march=armv8-a -mtune=generic -m64 -fPIC/g' | sed 's/ar cr/\/gcc\/bin\/aarch64-apple-darwin22-gcc-ar cr/g' > Makefile.tmp + mv Makefile.tmp ../ggcat/crates/capi/ggcat-cpp-api/Makefile + + # compile aarch64 + cmake -DCMAKE_TOOLCHAIN_FILE="/io/$ARCH-toolchain_GNU.cmake" \ + -DCMAKE_C_FLAGS="-march=armv8-a -mtune=generic -m64 -fPIC -fPIE" \ + -DCMAKE_CXX_FLAGS="-march=armv8-a -mtune=generic -m64 -fPIC -fPIE" \ + -DAPPLE=1 \ + -DCMAKE_SYSTEM_PROCESSOR="arm64" \ + -DROARING_DISABLE_NATIVE=ON \ + -DBZIP2_LIBRARIES="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libbz2.tbd" -DBZIP2_INCLUDE_DIR="/osxcross/SDK/MacOSX13.0.sdk/usr/include" \ + -DZLIB="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libz.tbd" -DZLIB_INCLUDE_DIR="/osxcross/SDK/MacOSX13.0.sdk/usr/include" \ + -DZLIB_LIBRARY=="/osxcross/SDK/MacOSX13.0.sdk/usr/lib/libz.tbd" \ + -DMAX_KMER_LENGTH=31 \ + .. + target_arch="aarch64-apple-darwin22" +fi + +make VERBOSE=1 -j + +## gather the stuff to distribute +target=themisto-${VER}-$target_arch +path=/io/tmp/$target +mkdir $path +cp ../build/bin/themisto $path/ +cp ../README.md $path/ +cp ../LICENSE.txt $path/ +cd /io/tmp +tar -zcvf $target.tar.gz $target +mv $target.tar.gz /io/ +cd /io/ +rm -rf tmp cache + diff --git a/deploy/macOS/compile_in_docker.sh b/deploy/macOS/compile_in_docker.sh new file mode 100755 index 0000000..03398a7 --- /dev/null +++ b/deploy/macOS/compile_in_docker.sh @@ -0,0 +1,35 @@ +#!/bin/sh +## Wrapper for calling the build script `build.sh` inside the +## macoss-cross-compiler docker container. +# +# Arguments +## 1: version number to build (checked out from the git source tree) +## 2: architecture (one of x86-64,arm64) + +set -eo pipefail + +VER=$1 +if [[ -z $VER ]]; then + echo "Error: specify version as argument 1" + exit; +fi + +ARCH=$2 +if [[ -z $ARCH ]]; then + echo "Error: specify architecture (one of x86-64,arm64) as argument 2" + exit; +fi + +set -ux + +cp ../$2-toolchain_GNU.cmake ./ + +docker run \ + -v `pwd`:/io \ + --rm \ + -it \ + ghcr.io/shepherdjerred/macos-cross-compiler:latest \ + /bin/bash /io/build.sh $1 $2 + +rm $2-toolchain_GNU.cmake + diff --git a/deploy/macOS/x86-64-toolchain_GNU.cmake b/deploy/macOS/x86-64-toolchain_GNU.cmake new file mode 100644 index 0000000..ae9a57e --- /dev/null +++ b/deploy/macOS/x86-64-toolchain_GNU.cmake @@ -0,0 +1,23 @@ +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR X86) + +set(TOOLCHAIN_NAME x86_64-apple-darwin22) +set(COMPILER_PATH "/gcc/bin") +set(TOOLCHAIN_PATH "/cctools/bin") +set(LLVM_PATH "/usr/lib/llvm-14/bin") + +set(CMAKE_C_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc") +set(CMAKE_CXX_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-g++") +set(CMAKE_ASM_COMPILER "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc") +set(CMAKE_RC_COMPILER "${LLVM_PATH}/llvm-rc") +set(CMAKE_LINKER "${TOOLCHAIN_PATH}/${TOOLCHAIN_NAME}-ld") +set(CMAKE_ADDR2LINE "${LLVM_PATH}/llvm-addr2line") +set(CMAKE_AR "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-ar") +set(CMAKE_DLLTOOL "${LLVM_PATH}/llvm-dlltool") +set(CMAKE_MT "${LLVM_PATH}/llvm-mt") +set(CMAKE_NM "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-nm") +set(CMAKE_OBJCOPY "${LLVM_PATH}/llvm-objcopy") +set(CMAKE_OBJDUMP "${LLVM_PATH}/llvm-objdump") +set(CMAKE_RANLIB "${COMPILER_PATH}/${TOOLCHAIN_NAME}-gcc-ranlib") +set(CMAKE_READELF "${LLVM_PATH}/llvm-readelf") +set(CMAKE_STRIP "${TOOLCHAIN_PATH}/${TOOLCHAIN_NAME}-strip")