Skip to content

Commit

Permalink
Add YAML tests
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosdelest committed Dec 2, 2024
1 parent 916ac83 commit 229ce2d
Show file tree
Hide file tree
Showing 6 changed files with 471 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,86 @@ setup:
# here we verify that are last hit is always the worst one
- match: { hits.hits.2._id: "1" }

---
"Vector rescoring has similar ordering as knn, same scoring as exact search for kNN section":
- skip:
features: "headers"
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: bbq_hnsw
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
k: 3
num_candidates: 3

- match: { hits.total: 3 }
- set: { hits.hits.0._id: knn_id0 }
- set: { hits.hits.1._id: knn_id1 }
- set: { hits.hits.2._id: knn_id2 }
- set: { hits.hits.0._score: knn_score0 }
- set: { hits.hits.1._score: knn_score1 }
- set: { hits.hits.2._score: knn_score2 }

# Rescore
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: bbq_hnsw
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
k: 3
num_candidates: 3
rescore:
oversample: 1.5

# Comparing to knn search, we already have changes in ordering and scoring
- match: { hits.hits.0._id: $knn_id1 }
- match: { hits.hits.1._id: $knn_id0 }
- match: { hits.hits.2._id: $knn_id2 }

# Get rescoring scores
- match: { hits.total: 3 }
- set: { hits.hits.0._id: rescore_id0 }
- set: { hits.hits.1._id: rescore_id1 }
- set: { hits.hits.2._id: rescore_id2 }
- set: { hits.hits.0._score: rescore_score0 }
- set: { hits.hits.1._score: rescore_score1 }
- set: { hits.hits.2._score: rescore_score2 }

# Exact knn via script score
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
body:
query:
script_score:
query: {match_all: {} }
script:
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
params:
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0 ]

# Check same ordering (which will not be true for larger datasets)
# and scoring (which should be for the elements that are present in both)
- match: { hits.total: 3 }
- match: { hits.hits.0._id: $rescore_id0 }
- match: { hits.hits.1._id: $rescore_id1 }
- match: { hits.hits.2._id: $rescore_id2 }
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"Test bad quantization parameters":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,85 @@ setup:
- match: {hits.hits.2._id: "1"}
- gte: {hits.hits.2._score: 0.78}
- lte: {hits.hits.2._score: 0.791}

---
# Won't be true for larger datasets, but this helps checking kNN vs rescoring vs exact search
"Vector rescoring has same ordering as knn, same scoring as exact search for kNN section":
- skip:
features: "headers"

# kNN search
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: hnsw_byte_quantized
body:
size: 3
query:
knn:
k: 3
num_candidates: 3
field: vector
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]

- match: { hits.total: 3 }
- set: { hits.hits.0._id: knn_id0 }
- set: { hits.hits.1._id: knn_id1 }
- set: { hits.hits.2._id: knn_id2 }

# Rescore
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: hnsw_byte_quantized
body:
size: 3
query:
knn:
k: 3
num_candidates: 3
field: vector
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
rescore:
oversample: 1.5

# Check same ordering (which will not be true for larger datasets)
- match: { hits.total: 3 }
- match: { hits.hits.0._id: $knn_id0 }
- match: { hits.hits.1._id: $knn_id1 }
- match: { hits.hits.2._id: $knn_id2 }
- set: { hits.hits.0._score: rescore_score0 }
- set: { hits.hits.1._score: rescore_score1 }
- set: { hits.hits.2._score: rescore_score2 }

- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
body:
query:
script_score:
query: {match_all: {} }
script:
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
params:
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]

# Check same ordering (which will not be true for larger datasets)
# and scoring (which should be for the elements that are present in both)
- match: { hits.total: 3 }
- match: { hits.hits.0._id: $knn_id0 }
- match: { hits.hits.1._id: $knn_id1 }
- match: { hits.hits.2._id: $knn_id2 }
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"Test bad quantization parameters":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,84 @@ setup:
- match: { hits.hits.1._id: "2"}
- match: { hits.hits.2._id: "3"}
---
"Vector rescoring has same ordering as knn, same scoring as exact search for kNN section":
- skip:
features: "headers"
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: hnsw_byte_quantized
body:
fields: [ "name" ]
knn:
field: vector
query_vector: [-0.5, 90.0, -10, 14.8]
k: 3
num_candidates: 3

- match: { hits.total: 3 }
- set: { hits.hits.0._id: knn_id0 }
- set: { hits.hits.1._id: knn_id1 }
- set: { hits.hits.2._id: knn_id2 }

# Rescore
- do:
headers:
Content-Type: application/json
search:
index: hnsw_byte_quantized
rest_total_hits_as_int: true
body:
fields: [ "name" ]
knn:
field: vector
query_vector: [-0.5, 90.0, -10, 14.8]
k: 3
num_candidates: 3
rescore:
oversample: 1.5

# Comparing to knn search
- match: { hits.hits.0._id: $knn_id0 }
- match: { hits.hits.1._id: $knn_id1 }
- match: { hits.hits.2._id: $knn_id2 }

# Get rescoring scores
- match: { hits.total: 3 }
- set: { hits.hits.0._id: rescore_id0 }
- set: { hits.hits.1._id: rescore_id1 }
- set: { hits.hits.2._id: rescore_id2 }
- set: { hits.hits.0._score: rescore_score0 }
- set: { hits.hits.1._score: rescore_score1 }
- set: { hits.hits.2._score: rescore_score2 }

- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
body:
query:
script_score:
query: {match_all: {} }
script:
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
params:
query_vector: [-0.5, 90.0, -10, 14.8]

# Check same ordering (which will not be true for larger datasets)
# and scoring (which should be for the elements that are present in both)
- match: { hits.total: 3 }
- match: { hits.hits.0._id: $rescore_id0 }
- match: { hits.hits.1._id: $rescore_id1 }
- match: { hits.hits.2._id: $rescore_id2 }
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"Test odd dimensions fail indexing":
- do:
catch: bad_request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,86 @@ setup:
# here we verify that are last hit is always the worst one
- match: { hits.hits.2._id: "1" }
---
"Vector rescoring has similar ordering as knn, same scoring as exact search for kNN section":
- skip:
features: "headers"
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: bbq_flat
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
k: 3
num_candidates: 3

- match: { hits.total: 3 }
- set: { hits.hits.0._id: knn_id0 }
- set: { hits.hits.1._id: knn_id1 }
- set: { hits.hits.2._id: knn_id2 }
- set: { hits.hits.0._score: knn_score0 }
- set: { hits.hits.1._score: knn_score1 }
- set: { hits.hits.2._score: knn_score2 }

# Rescore
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
index: bbq_flat
body:
knn:
field: vector
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
k: 3
num_candidates: 3
rescore:
oversample: 1.5

# Comparing to knn search, we already have changes in ordering and scoring
- match: { hits.hits.0._id: $knn_id1 }
- match: { hits.hits.1._id: $knn_id0 }
- match: { hits.hits.2._id: $knn_id2 }

# Get rescoring scores
- match: { hits.total: 3 }
- set: { hits.hits.0._id: rescore_id0 }
- set: { hits.hits.1._id: rescore_id1 }
- set: { hits.hits.2._id: rescore_id2 }
- set: { hits.hits.0._score: rescore_score0 }
- set: { hits.hits.1._score: rescore_score1 }
- set: { hits.hits.2._score: rescore_score2 }

# Exact knn via script score
- do:
headers:
Content-Type: application/json
search:
rest_total_hits_as_int: true
body:
query:
script_score:
query: {match_all: {} }
script:
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
params:
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]

# Check same ordering (which will not be true for larger datasets)
# and scoring (which should be for the elements that are present in both)
- match: { hits.total: 3 }
- match: { hits.hits.0._id: $rescore_id0 }
- match: { hits.hits.1._id: $rescore_id1 }
- match: { hits.hits.2._id: $rescore_id2 }
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"Test bad parameters":
- do:
catch: bad_request
Expand Down
Loading

0 comments on commit 229ce2d

Please sign in to comment.