Skip to content

Commit

Permalink
Added a support for search (search_after parameter) (#859)
Browse files Browse the repository at this point in the history
* Added a sample that uses search_after parameter

Signed-off-by: Nathalie Jonathan <[email protected]>

* Moved search_after sample to samples/search folder, updated CHANGELOG and _sync sample, and added _async sample.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Solved conflicts in CHANGELOG.md

Signed-off-by: Nathalie Jonathan <[email protected]>

---------

Signed-off-by: Nathalie Jonathan <[email protected]>
  • Loading branch information
nathaliellenaa authored Dec 2, 2024
1 parent 6f761ab commit 87aebcd
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [Unreleased]
### Added
- Added option to pass custom headers to 'AWSV4SignerAsyncAuth' ([863](https://github.com/opensearch-project/opensearch-py/pull/863))
- Added sync and async sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859))
### Updated APIs
### Changed
### Deprecated
Expand Down
112 changes: 112 additions & 0 deletions samples/search/search_after_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import asyncio
import os

from opensearchpy import AsyncOpenSearch


async def main() -> None:
"""
This sample uses asyncio and AsyncOpenSearch to asynchronously
connect to local OpenSearch cluster, performs a search query on an index,
retrieves the first page of results, and fetches the next page of results
using the search_after parameter.
"""

# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = AsyncOpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

# create an index
await client.indices.create(index="movies")

try:
# add a large dataset (100 movies)
for i in range(15):
await client.index(
index="movies",
id=i,
body={
"title": f"The Dark Knight {i}",
"director": "Christopher Nolan",
"year": 2008 + i,
},
)

for i in range(95):
await client.index(
index="movies",
id=i + 15,
body={
"title": f"Movie Title {i + 15}",
"director": f"Director {i + 15}",
"year": 1950 + i + 15,
},
)

# refresh the index to make the documents searchable
await client.indices.refresh(index="movies")

# define the search query with sorting and pagination options
search_body = {
"query": {"match": {"title": "dark knight"}},
"sort": [{"year": {"order": "asc"}}],
"size": 10,
}

page = 1
total_hits = 0
while True:
# execute the search
response = await client.search(index="movies", body=search_body)
hits = response["hits"]["hits"]

# break if no more results
if not hits:
break

print(f"\nPage {page}:")

for hit in hits:
print(hit)
total_hits += 1

# get the sort values of the last document for the next page
last_sort = hits[-1]["sort"]
search_body["search_after"] = last_sort
page += 1

print("\nPagination Summary:")
print(f"Total pages: {page - 1}")
print(f"Total hits: {total_hits}")
print(f"Results per page: {search_body['size']}")
finally:
# delete the index
await client.indices.delete(index="movies")
await client.close()


if __name__ == "__main__":
asyncio.run(main())
109 changes: 109 additions & 0 deletions samples/search/search_after_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import os

from opensearchpy import OpenSearch


def main() -> None:
"""
This sample shows how to use search_after to paginate through the search results.
It performs a search query on an index, retrieves the first page of results,
and then fetches the next page of results using the search_after parameter.
"""

# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = OpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

# create an index
client.indices.create(index="movies")

try:
# add a large dataset (100 movies)
for i in range(15):
client.index(
index="movies",
id=i,
body={
"title": f"The Dark Knight {i}",
"director": "Christopher Nolan",
"year": 2008 + i,
},
)

for i in range(95):
client.index(
index="movies",
id=i + 15,
body={
"title": f"Movie Title {i + 15}",
"director": f"Director {i + 15}",
"year": 1950 + i + 15,
},
)

# refresh the index to make the documents searchable
client.indices.refresh(index="movies")

# define the search query with sorting and pagination options
search_body = {
"query": {"match": {"title": "dark knight"}},
"sort": [{"year": {"order": "asc"}}],
"size": 10,
}

page = 1
total_hits = 0
while True:
# execute the search
response = client.search(index="movies", body=search_body)
hits = response["hits"]["hits"]

# break if no more results
if not hits:
break

print(f"\nPage {page}:")

for hit in hits:
print(hit)
total_hits += 1

# get the sort values of the last document for the next page
last_sort = hits[-1]["sort"]
search_body["search_after"] = last_sort
page += 1

print("\nPagination Summary:")
print(f"Total pages: {page - 1}")
print(f"Total hits: {total_hits}")
print(f"Results per page: {search_body['size']}")
finally:
# delete the index
client.indices.delete(index="movies")


if __name__ == "__main__":
main()

0 comments on commit 87aebcd

Please sign in to comment.