diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 04909903c..52eddfc45 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -38,6 +38,7 @@ jobs: - dolphinnpy - elasticsearch - elastiknn + - expann - faiss - flann - glass diff --git a/README.md b/README.md index dc6d42b6c..a103d80f9 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Evaluated * [Vearch](https://github.com/vearch/vearch) ![https://img.shields.io/github/stars/vearch/vearch?style=social](https://img.shields.io/github/stars/vearch/vearch?style=social) * [Elasticsearch](https://github.com/elastic/elasticsearch) ![https://img.shields.io/github/stars/elastic/elasticsearch?style=social](https://img.shields.io/github/stars/elastic/elasticsearch?style=social): HNSW * [Elastiknn](https://github.com/alexklibisz/elastiknn) ![https://img.shields.io/github/stars/alexklibisz/elastiknn?style=social](https://img.shields.io/github/stars/alexklibisz/elastiknn?style=social) +* [ExpANN](https://github.com/jacketsj/expANN) ![https://img.shields.io/github/stars/jacketsj/expANN?style=social](https://img.shields.io/github/stars/jacketsj/expANN?style=social) * [OpenSearch KNN](https://github.com/opensearch-project/k-NN) ![https://img.shields.io/github/stars/opensearch-project/k-NN?style=social](https://img.shields.io/github/stars/opensearch-project/k-NN?style=social) * [DiskANN](https://github.com/microsoft/diskann) ![https://img.shields.io/github/stars/microsoft/diskann?style=social](https://img.shields.io/github/stars/microsoft/diskann?style=social): Vamana, Vamana-PQ * [Vespa](https://github.com/vespa-engine/vespa) ![https://img.shields.io/github/stars/vespa-engine/vespa?style=social](https://img.shields.io/github/stars/vespa-engine/vespa?style=social) diff --git a/ann_benchmarks/algorithms/expann/Dockerfile b/ann_benchmarks/algorithms/expann/Dockerfile new file mode 100644 index 000000000..cad633c5b --- /dev/null +++ b/ann_benchmarks/algorithms/expann/Dockerfile @@ -0,0 +1,24 @@ +FROM ann-benchmarks + +RUN apt update +RUN apt install -y software-properties-common +RUN add-apt-repository -y ppa:git-core/ppa +RUN apt update +RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 nlohmann-json3-dev libeigen3-dev nlohmann-json3-dev valgrind + +ADD https://api.github.com/repos/jacketsj/expANN/git/refs/heads/ann-benchmarks-stable-v1 version.json +RUN git clone -b main https://github.com/jacketsj/expANN.git +RUN cd expANN && git submodule update --init --recursive +RUN cd expANN && pip install pybind11 +RUN cd expANN && bash build.sh + +ENV PYTHONPATH=$PYTHONPATH:/home/app/expANN/build + +WORKDIR /home/app + +RUN python3 -c "import expann_py" +RUN python3 -c "import expann_py_64" +RUN python3 -c "import expann_py_128" +RUN python3 -c "import expann_py_256" +RUN python3 -c "import expann_py_832" +RUN python3 -c "import expann_py_960" diff --git a/ann_benchmarks/algorithms/expann/config.yml b/ann_benchmarks/algorithms/expann/config.yml new file mode 100644 index 000000000..b6118c275 --- /dev/null +++ b/ann_benchmarks/algorithms/expann/config.yml @@ -0,0 +1,129 @@ +float: + any: + - base_args: ['@metric'] + constructor: ExpAnnWrapper + disabled: false + docker_tag: ann-benchmarks-expann + module: ann_benchmarks.algorithms.expann + name: expann + run_groups: + rg20_10: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg20_11: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg20_13: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg20_00: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg20_01: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg20_03: + arg_groups: [{M: 20, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_10: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_11: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_13: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_00: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_01: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg40_03: + arg_groups: [{M: 40, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_10: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_11: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_13: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_00: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_01: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg60_03: + arg_groups: [{M: 60, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_10: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_11: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_13: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_00: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_01: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg80_03: + arg_groups: [{M: 80, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_10: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_11: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_13: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: True}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_00: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 0, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_01: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 1, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] + rg150_03: + arg_groups: [{M: 150, ef_construction: 500, ortho_count: 1, prune_overflow: 3, use_compression: False}] + args: {} + query_args: [[10, 20, 30, 40, 50, 80, 120]] diff --git a/ann_benchmarks/algorithms/expann/module.py b/ann_benchmarks/algorithms/expann/module.py new file mode 100644 index 000000000..ca4c574f9 --- /dev/null +++ b/ann_benchmarks/algorithms/expann/module.py @@ -0,0 +1,47 @@ +import numpy as np +import expann_py as ep_nodim +import expann_py_64 +import expann_py_128 +import expann_py_256 +import expann_py_832 +import expann_py_960 + +from ..base.module import BaseANN + +class ExpAnnWrapper(BaseANN): + def __init__(self, metric, index_param): + self._m = index_param["M"] + self._ef_construction = index_param["ef_construction"] + self._ortho_count = index_param["ortho_count"] + self._prune_overflow = index_param["prune_overflow"] + self._use_compression = index_param["use_compression"] + self.name = "expANN Anti-Topo Engine" + self.res = None + self.metric = metric + self.modules = { + 64: expann_py_64, + 128: expann_py_128, + 256: expann_py_256, + 832: expann_py_832, + 960: expann_py_960 + } + + def get_module_for_dim(self, d): + for dim in sorted(self.modules.keys()): + if d <= dim: + return dim, self.modules[dim] + return d, ep_nodim + + def fit(self, X): + self.dim_unpadded = X.shape[1] + self.dim_padded, self.epy = self.get_module_for_dim(self.dim_unpadded) + print("God padded dim:", self.dim_padded) + self.engine = self.epy.AntitopoEngine(self._m, self._ef_construction, self._ortho_count, self._prune_overflow, self._use_compression) + self.engine.store_many_vectors(X, self.metric == "angular") + self.engine.build() + + def query(self, q, k): + return self.engine.query_k_numpy(q, k) + + def set_query_arguments(self, ef_search): + self.engine.set_ef_search(ef_search)