Skip to content

Commit

Permalink
Support Filtering on Large List encoded by Bitmap (#14774)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: bowenlan-amzn <[email protected]>
Signed-off-by: Michael Froh <[email protected]>
Co-authored-by: Michael Froh <[email protected]>
  • Loading branch information
bowenlan-amzn and msfroh authored Aug 20, 2024
1 parent adf3660 commit 52ecbe9
Show file tree
Hide file tree
Showing 15 changed files with 1,028 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054))
- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709))
- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897))
- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774))
- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153))

### Dependencies
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
---
setup:
- skip:
version: " - 2.99.99"
reason: The bitmap filtering feature is available in 2.17 and later.
- do:
indices.create:
index: students
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
student_id:
type: integer
- do:
bulk:
refresh: true
body:
- { "index": { "_index": "students", "_id": "1" } }
- { "name": "Jane Doe", "student_id": 111 }
- { "index": { "_index": "students", "_id": "2" } }
- { "name": "Mary Major", "student_id": 222 }
- { "index": { "_index": "students", "_id": "3" } }
- { "name": "John Doe", "student_id": 333 }
- do:
indices.create:
index: classes
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
enrolled:
type: binary
store: true
- do:
bulk:
refresh: true
body:
- { "index": { "_index": "classes", "_id": "101" } }
- { "enrolled": "OjAAAAEAAAAAAAEAEAAAAG8A3gA=" } # 111,222
- { "index": { "_index": "classes", "_id": "102" } }
- { "enrolled": "OjAAAAEAAAAAAAAAEAAAAG8A" } # 111
- { "index": { "_index": "classes", "_id": "103" } }
- { "enrolled": "OjAAAAEAAAAAAAAAEAAAAE0B" } # 333
- { "index": { "_index": "classes", "_id": "104" } }
- { "enrolled": "OjAAAAEAAAAAAAEAEAAAAN4ATQE=" } # 222,333
- do:
cluster.health:
wait_for_status: green

---
"Terms lookup on a binary field with bitmap":
- do:
search:
rest_total_hits_as_int: true
index: students
body: {
"query": {
"terms": {
"student_id": {
"index": "classes",
"id": "101",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
}
}
- match: { hits.total: 2 }
- match: { hits.hits.0._source.name: Jane Doe }
- match: { hits.hits.0._source.student_id: 111 }
- match: { hits.hits.1._source.name: Mary Major }
- match: { hits.hits.1._source.student_id: 222 }

---
"Terms query accepting bitmap as value":
- do:
search:
rest_total_hits_as_int: true
index: students
body: {
"query": {
"terms": {
"student_id": ["OjAAAAEAAAAAAAEAEAAAAG8A3gA="],
"value_type": "bitmap"
}
}
}
- match: { hits.total: 2 }
- match: { hits.hits.0._source.name: Jane Doe }
- match: { hits.hits.0._source.student_id: 111 }
- match: { hits.hits.1._source.name: Mary Major }
- match: { hits.hits.1._source.student_id: 222 }

---
"Boolean must bitmap filtering":
- do:
search:
rest_total_hits_as_int: true
index: students
body: {
"query": {
"bool": {
"must": [
{
"terms": {
"student_id": {
"index": "classes",
"id": "101",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
}
],
"must_not": [
{
"terms": {
"student_id": {
"index": "classes",
"id": "104",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
}
]
}
}
}
- match: { hits.total: 1 }
- match: { hits.hits.0._source.name: Jane Doe }
- match: { hits.hits.0._source.student_id: 111 }

---
"Boolean should bitmap filtering":
- do:
search:
rest_total_hits_as_int: true
index: students
body: {
"query": {
"bool": {
"should": [
{
"terms": {
"student_id": {
"index": "classes",
"id": "101",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
},
{
"terms": {
"student_id": {
"index": "classes",
"id": "104",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
}
]
}
}
}
- match: { hits.total: 3 }
- match: { hits.hits.0._source.name: Mary Major }
- match: { hits.hits.0._source.student_id: 222 }
- match: { hits.hits.1._source.name: Jane Doe }
- match: { hits.hits.1._source.student_id: 111 }
- match: { hits.hits.2._source.name: John Doe }
- match: { hits.hits.2._source.student_id: 333 }
3 changes: 3 additions & 0 deletions server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ dependencies {
api "com.google.protobuf:protobuf-java:${versions.protobuf}"
api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}"

// https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap
implementation 'org.roaringbitmap:RoaringBitmap:1.1.0'

testImplementation(project(":test:framework")) {
// tests use the locally compiled version of server
exclude group: 'org.opensearch', module: 'server'
Expand Down
1 change: 1 addition & 0 deletions server/licenses/RoaringBitmap-1.1.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
9607213861158ae7060234d93ee9c9cb19f494d1
Loading

0 comments on commit 52ecbe9

Please sign in to comment.