From 102fe7bd3bd415d9d456e4ddee5b0ad5a7299f25 Mon Sep 17 00:00:00 2001 From: alamuri Date: Thu, 12 Sep 2024 18:03:39 -0400 Subject: [PATCH] added yaml files for parquet --- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...na.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...na.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...sq.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...sq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...er.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...er.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...id.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...id.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...sh.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...sh.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ng.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ng.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...is.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...is.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ca.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ca.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...cs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...cs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...rs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...rs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ts.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ts.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ex.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ix.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...rs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...rs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ss.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ss.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ty.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ty.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...er.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...er.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...qa.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...qa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...qa.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...qa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...us.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...us.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...nq.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ra.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ra.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...04.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...cs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...cs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ct.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ct.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...1m.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...id.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...id.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...ws.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...ws.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.flat.cached.yaml | 53 +++++++++++++++++++ ...20.bge-base-en-v1.5.parquet.flat.onnx.yaml | 53 +++++++++++++++++++ ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 53 +++++++++++++++++++ ...20.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 53 +++++++++++++++++++ 116 files changed, 6148 insertions(+) create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..54905ab44 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/parquet/arguana +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..8aa82ce2b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/parquet/arguana +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..a89ca55d1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/parquet/arguana +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..95710334b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/parquet/arguana +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..e58ac778a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/parquet/bioasq +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..df8e805e4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/parquet/bioasq +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..4d13f0fc9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/parquet/bioasq +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 5000 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..4b12f744c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/parquet/bioasq +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 500 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 5000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..4ffa90bf9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/parquet/climate-fever +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..f20163e22 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/parquet/climate-fever +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..39361e212 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/parquet/climate-fever +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..c770ad1a5 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/parquet/climate-fever +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..42e6582a7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/parquet/cqadupstack-android +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..a233ece80 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/parquet/cqadupstack-android +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..e65786088 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/parquet/cqadupstack-android +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..6b5e1ceaa --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/parquet/cqadupstack-android +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..868d8c696 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/parquet/cqadupstack-english +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..20475994e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/parquet/cqadupstack-english +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..ae630e78f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/parquet/cqadupstack-english +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..0e798089b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/parquet/cqadupstack-english +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..94c97dd95 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/parquet/cqadupstack-gaming +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..1c62465c0 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/parquet/cqadupstack-gaming +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..0542cdae6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/parquet/cqadupstack-gaming +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..95e0667c4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/parquet/cqadupstack-gaming +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..ab56e7b69 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/parquet/cqadupstack-gis +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..5a70b06db --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/parquet/cqadupstack-gis +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..2a67b5b2e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/parquet/cqadupstack-gis +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..f60854cee --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/parquet/cqadupstack-gis +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..c4fe8bf38 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/parquet/cqadupstack-mathematica +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..60b7d7a50 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/parquet/cqadupstack-mathematica +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..c8ef4336b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/parquet/cqadupstack-mathematica +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..1445a34e2 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/parquet/cqadupstack-mathematica +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..433d2d6eb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/parquet/cqadupstack-physics +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..f61588dfa --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/parquet/cqadupstack-physics +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..d119cf03a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/parquet/cqadupstack-physics +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..884097cc7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/parquet/cqadupstack-physics +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..e8eef58f3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/parquet/cqadupstack-programmers +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..ec1cfbd7b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/parquet/cqadupstack-programmers +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..138fb8efd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/parquet/cqadupstack-programmers +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..ae70952f3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/parquet/cqadupstack-programmers +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..9a83f02c8 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/parquet/cqadupstack-stats +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..cee72e909 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/parquet/cqadupstack-stats +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..f9602e3dc --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/parquet/cqadupstack-stats +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..1cb01597f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/parquet/cqadupstack-stats +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..3d6709246 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/parquet/cqadupstack-tex +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..00f2696cc --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/parquet/cqadupstack-tex +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..e253333f9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/parquet/cqadupstack-tex +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..725d8f8ee --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/parquet/cqadupstack-tex +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..cde499810 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/parquet/cqadupstack-unix +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..04e7c1a67 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/parquet/cqadupstack-unix +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..7f9d3fadf --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/parquet/cqadupstack-unix +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..55beec0f1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/parquet/cqadupstack-unix +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..fa0d2f7f0 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/parquet/cqadupstack-webmasters +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..a84c77ec9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/parquet/cqadupstack-webmasters +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..a2b59eb90 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/parquet/cqadupstack-webmasters +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..a13e32715 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/parquet/cqadupstack-webmasters +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..9018e53cd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/parquet/cqadupstack-wordpress +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..99ee62e33 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/parquet/cqadupstack-wordpress +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..888acad0a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/parquet/cqadupstack-wordpress +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..1326c2a8e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/parquet/cqadupstack-wordpress +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..b0ddd8eab --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/parquet/dbpedia-entity +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..fab4ba681 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/parquet/dbpedia-entity +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..1b789cb5c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/parquet/dbpedia-entity +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..2e16038e4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/parquet/dbpedia-entity +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..fdf25e2b9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/parquet/fever +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..c9b0bbb41 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/parquet/fever +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..29c01d6b4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/parquet/fever +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..1339bfd65 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/parquet/fever +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..33060e19a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/parquet/fiqa +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..ae8e00c96 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/parquet/fiqa +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..421a0bdf4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/parquet/fiqa +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..a8755f7cb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/parquet/fiqa +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..7ea4a5336 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/parquet/hotpotqa +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..629ecf212 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/parquet/hotpotqa +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..a4a906ffd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/parquet/hotpotqa +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..682caa635 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/parquet/hotpotqa +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..246e9fb66 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/parquet/nfcorpus +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..ef1780348 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/parquet/nfcorpus +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..6c40a1d46 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/parquet/nfcorpus +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..3b8ba633c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/parquet/nfcorpus +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..52e4df12a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/parquet/nq +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..8dc50a42d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/parquet/nq +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..4733708f4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/parquet/nq +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..2d6b75f9f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/parquet/nq +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..cfd995c55 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/parquet/quora +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..ac5a75722 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/parquet/quora +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..6b5a3a716 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/parquet/quora +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..6d8eb9b5d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/parquet/quora +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..f25d23e3a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/parquet/robust04 +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..37dcf3b83 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/parquet/robust04 +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..975db8d44 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/parquet/robust04 +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..0b7a0507d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/parquet/robust04 +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..fbbd7cf60 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/parquet/scidocs +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..953d845c4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/parquet/scidocs +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..57acac5d9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/parquet/scidocs +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..31f7c541c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/parquet/scidocs +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..2b62dbaec --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/parquet/scifact +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery + -threads 16 -hits 1000 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..582e01ae3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/parquet/scifact +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery + -threads 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..448a498e5 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/parquet/scifact +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery + -threads 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..b1fd40d0f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/parquet/scifact +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -removeQuery + -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..492abafd1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/parquet/signal1m +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..fa3a186d4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/parquet/signal1m +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..ff0686a07 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/parquet/signal1m +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..dd7e401a8 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/parquet/signal1m +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..0a52b0e81 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/parquet/trec-covid +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..e1ec6e4f8 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/parquet/trec-covid +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..c72f704fd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/parquet/trec-covid +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..d2760726e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/parquet/trec-covid +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..66622226b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/parquet/trec-news +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..0ba98ad01 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/parquet/trec-news +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..3e6634e24 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/parquet/trec-news +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..94f7848ea --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/parquet/trec-news +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml new file mode 100644 index 000000000..34aefbaad --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/parquet/webis-touche2020 +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: +- name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml new file mode 100644 index 000000000..e1f820509 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/parquet/webis-touche2020 +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: "" + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: +- name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml new file mode 100644 index 000000000..1db44bfd1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/parquet/webis-touche2020 +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: +- name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: +- name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads + 16 -hits 1000 -efSearch 1000 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml new file mode 100644 index 000000000..49f5fc444 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -0,0 +1,53 @@ +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/parquet/webis-touche2020 +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge + +metrics: +- metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false +- metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: +- name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: +- name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -generator VectorQueryGenerator -topicField title -removeQuery -threads + 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298