-
Notifications
You must be signed in to change notification settings - Fork 183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add micro benchmarks. #537
Changes from 11 commits
bfffb78
6da4eea
39f8830
c7e388d
1f0b464
be14447
5a4584a
8d242f8
4f22f3c
fcfd6a4
e2ccdd0
d65095d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
- [Benchmarks](#benchmarks) | ||
- [Start OpenSearch](#start-opensearch) | ||
- [Install Prerequisites](#install-prerequisites) | ||
- [Run Benchmarks](#run-benchmarks) | ||
|
||
## Benchmarks | ||
|
||
Python client benchmarks using [richbench](https://github.com/tonybaloney/rich-bench). | ||
|
||
### Start OpenSearch | ||
|
||
``` | ||
docker run -p 9200:9200 -e "discovery.type=single-node" opensearchproject/opensearch:latest | ||
``` | ||
|
||
### Install Prerequisites | ||
|
||
Install [poetry](https://python-poetry.org/docs/), then install package dependencies. | ||
|
||
``` | ||
poetry install | ||
``` | ||
|
||
Benchmarks use the code in this repository by specifying the dependency as `opensearch-py = { path = "..", develop=true, extras=["async"] }` in [pyproject.toml](pyproject.toml). | ||
|
||
### Run Benchmarks | ||
|
||
Run all benchmarks available as follows. | ||
|
||
``` | ||
$ poetry run richbench . --repeat 1 --times 1 | ||
Benchmarks, repeat=1, number=1 | ||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ | ||
┃ Benchmark ┃ Min ┃ Max ┃ Mean ┃ Min (+) ┃ Max (+) ┃ Mean (+) ┃ | ||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ | ||
│ 1 client vs. more clients (async) │ 1.640 │ 1.640 │ 1.640 │ 1.102 (1.5x) │ 1.102 (1.5x) │ 1.102 (1.5x) │ | ||
│ 1 thread vs. 32 threads (sync) │ 5.526 │ 5.526 │ 5.526 │ 1.626 (3.4x) │ 1.626 (3.4x) │ 1.626 (3.4x) │ | ||
│ 1 thread vs. 32 threads (sync) │ 4.639 │ 4.639 │ 4.639 │ 3.363 (1.4x) │ 3.363 (1.4x) │ 3.363 (1.4x) │ | ||
│ sync vs. async (8) │ 3.198 │ 3.198 │ 3.198 │ 0.966 (3.3x) │ 0.966 (3.3x) │ 0.966 (3.3x) │ | ||
└───────────────────────────────────┴─────────┴─────────┴─────────┴─────────────────┴─────────────────┴─────────────────┘ | ||
``` | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please put the commands within a single code block for easier copying? The output can be placed in a separate block or outside of the code block. Thank you! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
Run a specific benchmark, e.g. [bench_sync.py](bench_sync.py) by specifying `--benchmark [name]`. | ||
|
||
``` | ||
$ poetry run richbench . --repeat 1 --times 1 --benchmark sync | ||
Benchmarks, repeat=1, number=1 | ||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ | ||
┃ Benchmark ┃ Min ┃ Max ┃ Mean ┃ Min (+) ┃ Max (+) ┃ Mean (+) ┃ | ||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ | ||
│ 1 thread vs. 32 threads (sync) │ 6.804 │ 6.804 │ 6.804 │ 3.409 (2.0x) │ 3.409 (2.0x) │ 3.409 (2.0x) │ | ||
└────────────────────────────────┴─────────┴─────────┴─────────┴─────────────────┴─────────────────┴─────────────────┘ | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env python | ||
|
||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
import asyncio | ||
import uuid | ||
|
||
from opensearchpy import AsyncHttpConnection, AsyncOpenSearch | ||
|
||
host = "localhost" | ||
port = 9200 | ||
auth = ("admin", "admin") | ||
index_name = "test-index-async" | ||
item_count = 100 | ||
|
||
|
||
async def index_records(client, item_count): | ||
await asyncio.gather( | ||
*[ | ||
client.index( | ||
index=index_name, | ||
body={ | ||
"title": f"Moneyball", | ||
"director": "Bennett Miller", | ||
"year": "2011", | ||
}, | ||
id=uuid.uuid4(), | ||
) | ||
for j in range(item_count) | ||
] | ||
) | ||
|
||
|
||
async def test_async(client_count=1, item_count=1): | ||
clients = [] | ||
for i in range(client_count): | ||
clients.append( | ||
AsyncOpenSearch( | ||
hosts=[{"host": host, "port": port}], | ||
http_auth=auth, | ||
use_ssl=True, | ||
verify_certs=False, | ||
ssl_show_warn=False, | ||
connection_class=AsyncHttpConnection, | ||
pool_maxsize=client_count, | ||
) | ||
) | ||
|
||
if await clients[0].indices.exists(index_name): | ||
await clients[0].indices.delete(index_name) | ||
|
||
await clients[0].indices.create(index_name) | ||
|
||
await asyncio.gather( | ||
*[index_records(clients[i], item_count) for i in range(client_count)] | ||
) | ||
|
||
await clients[0].indices.refresh(index=index_name) | ||
print(await clients[0].count(index=index_name)) | ||
|
||
await clients[0].indices.delete(index_name) | ||
|
||
await asyncio.gather(*[client.close() for client in clients]) | ||
|
||
|
||
def test(item_count=1, client_count=1): | ||
loop = asyncio.new_event_loop() | ||
asyncio.set_event_loop(loop) | ||
loop.run_until_complete(test_async(item_count, client_count)) | ||
loop.close() | ||
|
||
|
||
def test_1(): | ||
test(1, 32 * item_count) | ||
|
||
|
||
def test_2(): | ||
test(2, 16 * item_count) | ||
|
||
|
||
def test_4(): | ||
test(4, 8 * item_count) | ||
|
||
|
||
def test_8(): | ||
test(8, 4 * item_count) | ||
|
||
|
||
def test_16(): | ||
test(16, 2 * item_count) | ||
|
||
|
||
def test_32(): | ||
test(32, item_count) | ||
|
||
|
||
__benchmarks__ = [(test_1, test_8, "1 client vs. more clients (async)")] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#!/usr/bin/env python | ||
|
||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
import logging | ||
import sys | ||
import time | ||
|
||
from thread_with_return_value import ThreadWithReturnValue | ||
|
||
from opensearchpy import OpenSearch | ||
|
||
host = "localhost" | ||
port = 9200 | ||
auth = ("admin", "admin") | ||
request_count = 250 | ||
|
||
|
||
root = logging.getLogger() | ||
# root.setLevel(logging.DEBUG) | ||
# logging.getLogger("urllib3.connectionpool").setLevel(logging.DEBUG) | ||
|
||
handler = logging.StreamHandler(sys.stdout) | ||
handler.setLevel(logging.DEBUG) | ||
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | ||
handler.setFormatter(formatter) | ||
root.addHandler(handler) | ||
|
||
|
||
def get_info(client, request_count): | ||
tt = 0 | ||
for n in range(request_count): | ||
start = time.time() * 1000 | ||
rc = client.info() | ||
total_time = time.time() * 1000 - start | ||
tt += total_time | ||
return tt | ||
|
||
|
||
def test(thread_count=1, request_count=1, client_count=1): | ||
clients = [] | ||
for i in range(client_count): | ||
clients.append( | ||
OpenSearch( | ||
hosts=[{"host": host, "port": port}], | ||
http_auth=auth, | ||
use_ssl=True, | ||
verify_certs=False, | ||
ssl_show_warn=False, | ||
pool_maxsize=thread_count, | ||
) | ||
) | ||
|
||
threads = [] | ||
for thread_id in range(thread_count): | ||
thread = ThreadWithReturnValue( | ||
target=get_info, args=[clients[thread_id % len(clients)], request_count] | ||
) | ||
threads.append(thread) | ||
thread.start() | ||
|
||
latency = 0 | ||
for t in threads: | ||
latency += t.join() | ||
|
||
print(f"latency={latency}") | ||
|
||
|
||
def test_1(): | ||
test(1, 32 * request_count, 1) | ||
|
||
|
||
def test_2(): | ||
test(2, 16 * request_count, 2) | ||
|
||
|
||
def test_4(): | ||
test(4, 8 * request_count, 3) | ||
|
||
|
||
def test_8(): | ||
test(8, 4 * request_count, 8) | ||
|
||
|
||
def test_32(): | ||
test(32, request_count, 32) | ||
|
||
|
||
__benchmarks__ = [(test_1, test_32, "1 thread vs. 32 threads (sync)")] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dblock, Please give complete paths for samples and benchmarks like it is for user_guide. To prevent link checker failure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed it for this PR, however that feels incorrect because when you're on a branch, you end up navigating to
main
which may have content that has evolved, or even code moved which then will break the link.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dblock, I agree
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened #538. Note that even with the full link it doesn't pass checking because that URL won't exist until the code is merged.