Skip to content

Commit

Permalink
Squashed 'lib/mmseqs/' changes from bad16c765..6a0dcee42
Browse files Browse the repository at this point in the history
6a0dcee42 Update Regression test
f92447d04 Rename slice to exhaustive search, add filterresult
6c2fefceb Set pca to 0.0 in expand2profile
0cc7e6748 Add unpackdb to split a database into separate files #406
877344c38 Add USE_SYSTEM_ZSTD cmake flag to use system provided zstd #411
bbd564172 Replace throw with abort in ALP again
46c26ce99 Add missing licenses and readmes for code in lib #403
20543e0aa Update ALP to 1.98 and add readme/license
d5717e821 Add CDD to databases downloader #410
04b27f987 msa2profile always copies lookup/source files instead of linking them to be independent from the MSA db
2d83f5171 msa2profile/result can skip the first sequence
242a8fafe Pass threads to tar2db in databases workflow
a19f5a526 Allow clustering of clustering input with set-cover or connected-component by ignoring scores/weight
39a414033 Don't set INT_MAX as --max-seqs in slice search to avoid huge allocations in prefilter
9290a2b52 Allow sequence database input in taxonomyreport #408
aaba0c7f2 Short circuit cluster-reassign if nothing can be reassigned
3822a8f56 Fix tmp files not getting removed in linclust/cluster with --remove-tmp--files
2a35e025a Fix kmermatcher setting user k-mer pattern in auto k-mer selection and breaking
a1050359f Rename accelerated 2bLCA to approximate 2bLCA to be consistent with manuscript
11698a5b4 Rename LICENCE to LICENSE soedinglab/MMseqs2#402
0828d8653 Allow result database input in taxonomyreport #401
b31ebb64a Krona taxonomy report was not working if no sequence was unclassified
9f0fb3ed6 Cleanup taxonomyreport
a2d9568d9 Fix wrong azure dependency
b1367fc26 Make resultToBuffer buffer sizes consistent (needs further refactoring)
98f9939d0 Get rid of results temporary array in msa2result
d495e0e9f Replace texlive with tectonic for userguide building
e03b52576 Fix MMseqs2 Taxonomy citation
602689c11 Update examples in mmseqs (easy-)taxonomy invocation
ecf152cf7 Improve (easy-)taxonomy description text by reordering parameters by importance
e0b044341 Improve description of --orf-filter
a7f91d46f Add warning if cluster or prefilter input is used in majoritylca with invalid --vote-mode
a3399397d Update regression to include recent speedup
d5da12d7a Add GTDB to databases downloader
83780f4ce Respect verbosity for rmdb calls in databases
9011c15d6 Improve output of databases list
86c03fd42 Increase buffer sizes in tar2db
2bd03c689 Fix tar directory (symlink, etc) entries causing tar2db to stop early
7bdb222da Use DBWriter to write .lookup multi-threaded in tar2db
23c9e1e75 Don't use multiple threads in tar2db when reading .tar.gz/.tgz as nearly all the time is spent inside zlib
2e128d4f8 Increase zlib buffer in tar2db to speedup reading
c19118935 Fix multiple locations where Util::checkAllocation would never be called as the preceding allocation would already terminate on failure
1f302134a Fix two compilation failures revealed by Debian
5b03cdff7 Another instance of the same warning
3fda449b6 Fix compile warning
3b0197afa Encode species names in taxonomy blocklist to make sure we don't block random nodes in non-NCBI taxonomies (e.g. GTDB)
ab2426f89 Fix String MultiParameter (e.g. sub matrices) breaking if filenames contain whitespaces
e8de35071 Encode whitespace containing parameters as base64 to better deal with shell word splitting in workflows
c7a7c366f Add instructions to simd.h
6672bbc9d Fix missing newline in log message
84034a527 Remove useless taxonomy ancestor warning
6609c6cd5 Fix invalid taxonomy output mode being set
441c52cf5 Fix taxpercontig not working with easy-taxonomy
4ce381092 lca is not computed by easy-taxonomy anymore
9d631c16a Fix cleanup of taxonomy intermediate files
d0f596f56 taxonomyreport and addtaxonomy output is now adjustable in easy-taxonomy
6bfd08d54 Cleanup default set parameters in easy-taxonomy
afcade163 Improve default taxonomy parameter lists shown (without -h)
fc126b3e1 Improve error messages when something is wrong with the input/output paths
3b49310f5 Improve unrecognized parameter message
83b9e9a18 Remove useless missing tmp dir warning
d0a9b79f1 Fix typo
48f9737a8 Add ORF filter parameters only to taxonomy for now
a60689753 Disable unfinished ORF filter in search
336d9d04b Add taxonomy citation
f7fde6fef Reduce binary taxonomy dump memory requirements slightly
eff61cfef Add \0 byte after serialization
7e63e1ea7 Fix typo in Parameters.h
019de271a Add vector of predefined substitution matrices
34b3a5396 Merge pull request #389 from mr-c/simde_v0.7.0
74724b3ae Cleanup headers in kmermatcher
73fd5cfa6 Update xxhash to v0.8.0
8dd192c00 Don't create false _has_{builtin,attribute}
062ef9953 Merge commit 'c2d60348af5c036eb2cbc7974d84065e16ab4096' into simde_v0.7.0
c2d60348a Squashed 'lib/simde/simde/' changes from f2257f11..b6c9c964

git-subtree-dir: lib/mmseqs
git-subtree-split: 6a0dcee42f77d10756cd03dee095b7aaafb5a444
  • Loading branch information
RuoshiZhang committed Feb 16, 2021
1 parent 34934fa commit 1b66171
Show file tree
Hide file tree
Showing 213 changed files with 25,931 additions and 5,623 deletions.
47 changes: 29 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(HAVE_POWER9 0 CACHE BOOL "Have POWER9 CPU")
set(HAVE_POWER8 0 CACHE BOOL "Have POWER8 CPU")
set(HAVE_ARM8 0 CACHE BOOL "Have ARMv8 CPU")
set(NATIVE_ARCH 1 CACHE BOOL "Assume native architecture for SIMD. Use one of the HAVE_* options or set CMAKE_CXX_FLAGS to the appropriate flags if you disable this.")
set(USE_SYSTEM_ZSTD 0 CACHE BOOL "Use zstd provided by system instead of bundled version")

if (HAVE_SANITIZER)
include(FindUBSan)
Expand Down Expand Up @@ -81,6 +82,8 @@ if (NATIVE_ARCH AND (MMSEQS_ARCH STREQUAL ""))
set(X64 1)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "x86|X86")
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^sparc")
set(SPARC 1)
else ()
message(WARNING "CPU without native SIMD instructions. Performance will be bad.")
endif ()
Expand Down Expand Up @@ -151,7 +154,7 @@ if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSI
set(DISABLE_IPS4O 1)
endif ()

if (PPC64)
if (PPC64 OR SPARC)
# FIXME: investigate why on ppc the regression seems to fail randomly
set(DISABLE_IPS4O 1)
endif ()
Expand All @@ -165,23 +168,31 @@ if (CMAKE_COMPILER_IS_CLANG AND (NOT EMSCRIPTEN))
set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -stdlib=libc++")
endif ()


# zstd
# We use ZSTD_findDecompressedSize which is only available with ZSTD_STATIC_LINKING_ONLY
# Thus we cannot use a system provided libzstd
set(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd")
set(CMAKE_INSTALL_LIBDIR bin)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd/build/cmake/CMakeModules")
option(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
option(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" OFF)
option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
option(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
option(ZSTD_BUILD_TESTS "BUILD TESTS" OFF)
include_directories(lib/zstd/lib)
add_subdirectory(lib/zstd/build/cmake/lib EXCLUDE_FROM_ALL)
set_target_properties(libzstd_static PROPERTIES COMPILE_FLAGS "${MMSEQS_C_FLAGS}" LINK_FLAGS "${MMSEQS_C_FLAGS}")
if (USE_SYSTEM_ZSTD)
include(FindPackageHandleStandardArgs)
find_path(ZSTD_INCLUDE_DIRS NAMES zstd.h REQUIRED)
# We use ZSTD_findDecompressedSize which is only available with ZSTD_STATIC_LINKING_ONLY
find_library(ZSTD_LIBRARIES NAMES libzstd.a libzstd_static REQUIRED)
find_package_handle_standard_args(ZSTD DEFAULT_MSG ZSTD_LIBRARIES ZSTD_INCLUDE_DIRS)
mark_as_advanced(ZSTD_LIBRARIES ZSTD_INCLUDE_DIRS)
include_directories(${ZSTD_INCLUDE_DIRS})
else ()
# We use ZSTD_findDecompressedSize which is only available with ZSTD_STATIC_LINKING_ONLY
set(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd")
set(CMAKE_INSTALL_LIBDIR bin)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd/build/cmake/CMakeModules")
option(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
option(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" OFF)
option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
option(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
option(ZSTD_BUILD_TESTS "BUILD TESTS" OFF)
include_directories(lib/zstd/lib)
add_subdirectory(lib/zstd/build/cmake/lib EXCLUDE_FROM_ALL)
set_target_properties(libzstd_static PROPERTIES COMPILE_FLAGS "${MMSEQS_C_FLAGS}" LINK_FLAGS "${MMSEQS_C_FLAGS}")
set(ZSTD_LIBRARIES libzstd_static)
endif()

# tinyexpr
include_directories(lib/tinyexpr)
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ MMseqs2 (Many-against-Many sequence searching) is a software suite to search and

[Mirdita M, Steinegger M and Soeding J. MMseqs2 desktop and local web server app for fast, interactive sequence searches. Bioinformatics, doi: 10.1093/bioinformatics/bty1057 (2019)](https://academic.oup.com/bioinformatics/article/35/16/2856/5280135).

[Mirdita M, Steinegger M, Breitwieser F, Soding J, Levy Karin E: Fast and sensitive taxonomic assignment to metagenomic contigs. bioRxiv, doi: 10.1101/2020.11.27.401018 (2020)](https://www.biorxiv.org/content/10.1101/2020.11.27.401018v1).

[![BioConda Install](https://img.shields.io/conda/dn/bioconda/mmseqs2.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/mmseqs2)
[![Github All Releases](https://img.shields.io/github/downloads/soedinglab/mmseqs2/total.svg)](https://github.com/soedinglab/mmseqs2/releases/latest)
[![Biocontainer Pulls](https://img.shields.io/endpoint?url=https%3A%2F%2Fmmseqs.com%2Fbiocontainer.php%3Fcontainer%3Dmmseqs2)](https://biocontainers.pro/#/tools/mmseqs2)
Expand Down
24 changes: 17 additions & 7 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,29 @@ variables:
regression: 1

jobs:
- job: build_ubuntu_1804_userguide
displayName: Ubuntu 1804 Userguide
- job: build_ubuntu_2004_userguide
displayName: Ubuntu 2004 Userguide
pool:
vmImage: 'Ubuntu-18.04'
vmImage: 'Ubuntu-20.04'
steps:
- checkout: "none"
- task: Cache@2
inputs:
key: '"tectonic" | "$(Agent.OS)"'
restoreKeys: |
"tectonic" | "$(Agent.OS)"
"tectonic"
path: $(Pipeline.Workspace)/tectonic-cache/
displayName: Cache Tectonic
- script: |
sudo apt-get update
sudo apt-get -y install pandoc texlive-latex-recommended texlive-fonts-extra
wget -qO- https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%400.4.1/tectonic-0.4.1-x86_64-unknown-linux-gnu.tar.gz | tar xzvf - tectonic
wget -qO- https://github.com/jgm/pandoc/releases/download/2.11.3.2/pandoc-2.11.3.2-linux-amd64.tar.gz | tar --strip-components=2 -xzvf - pandoc-2.11.3.2/bin/pandoc
sudo mv -f pandoc tectonic /usr/local/bin
displayName: Install Dependencies
- script: |
cd ${SYSTEM_DEFAULTWORKINGDIRECTORY}
git clone https://github.com/soedinglab/MMseqs2.wiki.git .
export XDG_CACHE_HOME=${PIPELINE_WORKSPACE}/tectonic-cache/
.pandoc/make-pdf.sh
displayName: Build Userguide
- task: PublishPipelineArtifact@0
Expand Down Expand Up @@ -242,7 +252,7 @@ jobs:
pool:
vmImage: 'Ubuntu-18.04'
dependsOn:
- build_ubuntu_1804_userguide
- build_ubuntu_2004_userguide
- build_macos_1015
- build_ubuntu_1804
- build_ubuntu_cross_2004
Expand All @@ -251,7 +261,7 @@ jobs:
- script: |
cd "${BUILD_SOURCESDIRECTORY}"
mkdir mmseqs
cp -f README.md LICENCE.md mmseqs
cp -f README.md LICENSE.md mmseqs
cp -r examples mmseqs
mkdir mmseqs/matrices
cp -f data/*.out mmseqs/matrices
Expand Down
Loading

0 comments on commit 1b66171

Please sign in to comment.