Skip to content

Commit

Permalink
Add benchmark on collection load time (#204)
Browse files Browse the repository at this point in the history
* Recover collection from a snapshot
* Push init_time_ms values into postgres
* Collect telemetry separately
* Add dedicated benchmark strategy
* Use new benchmark-server-3
* Run in parallel
  • Loading branch information
tellet-q authored Sep 27, 2024
1 parent 6bab477 commit 5cea6f1
Show file tree
Hide file tree
Showing 8 changed files with 257 additions and 35 deletions.
77 changes: 77 additions & 0 deletions .github/workflows/continuous-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ jobs:
export ENGINE_NAME="qdrant-all-on-disk-scalar-q"
export DATASETS="random-768-100-tenants"
export BENCHMARK_STRATEGY="tenants"
export CONTAINER_MEM_LIMIT=160mb
# Benchmark the dev branch:
Expand All @@ -105,6 +106,82 @@ jobs:
- name: Fail job if any of the benches failed
if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
run: exit 1
- name: Send Notification
if: failure() || cancelled()
uses: slackapi/[email protected]
with:
payload: |
{
"text": "CI tenants benchmarks run status: ${{ job.status }}",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "CI tenants benchmarks failed because of ${{ steps.benches.outputs.failed }}.\nView the results <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|here>"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
runLoadTimeBenchmark:
runs-on: ubuntu-latest
needs: runBenchmark
if: ${{ always() }}
steps:
- uses: actions/checkout@v3
- uses: webfactory/[email protected]
with:
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
- name: Benches
id: benches
run: |
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
export SERVER_NAME="benchmark-server-3"
bash -x tools/setup_ci.sh
set +e
# Benchmark collection load time
export BENCHMARK_STRATEGY="collection-reload"
declare -A DATASET_TO_ENGINE
declare -A DATASET_TO_URL
DATASET_TO_ENGINE["all-payloads-default"]="qdrant-continuous-benchmark-snapshot"
DATASET_TO_ENGINE["all-payloads-on-disk"]="qdrant-continuous-benchmark-snapshot"
DATASET_TO_ENGINE["all-payloads-default-sparse"]="qdrant-continuous-benchmark-snapshot"
DATASET_TO_ENGINE["all-payloads-on-disk-sparse"]="qdrant-continuous-benchmark-snapshot"
export STORAGE_URL="https://storage.googleapis.com/qdrant-benchmark-snapshots/all-payloads"
DATASET_TO_URL["all-payloads-default"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-default.snapshot"
DATASET_TO_URL["all-payloads-on-disk"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-on-disk.snapshot"
DATASET_TO_URL["all-payloads-default-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-default.snapshot"
DATASET_TO_URL["all-payloads-on-disk-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-on-disk.snapshot"
set +e
for dataset in "${!DATASET_TO_ENGINE[@]}"; do
export ENGINE_NAME=${DATASET_TO_ENGINE[$dataset]}
export DATASETS=$dataset
export SNAPSHOT_URL=${DATASET_TO_URL[$dataset]}
# Benchmark the dev branch:
export QDRANT_VERSION=ghcr/dev
timeout 30m bash -x tools/run_ci.sh
# Benchmark the master branch:
export QDRANT_VERSION=docker/master
timeout 30m bash -x tools/run_ci.sh
done
set -e
- name: Fail job if any of the benches failed
if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
run: exit 1
- name: Send Notification
if: failure() || cancelled()
uses: slackapi/[email protected]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: '3.7'

services:
qdrant_bench:
image: ${CONTAINER_REGISTRY:-docker.io}/qdrant/qdrant:${QDRANT_VERSION}
container_name: qdrant-continuous
environment:
QDRANT_NUM_CPUS: 4
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant_storage:/qdrant/storage
logging:
driver: "json-file"
options:
max-file: 1
max-size: 10m
deploy:
resources:
limits:
memory: ${CONTAINER_MEM_LIMIT:-25Gb}

volumes:
qdrant_storage:
name: "qdrant_storage"
4 changes: 4 additions & 0 deletions tools/qdrant_collect_stats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ echo "$RSS_ANON_MEMORY_USAGE" > results/rss-anon-memory-usage-"${CURRENT_DATE}".
ROOT_API_RESPONSE=$(ssh -t "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "curl -s http://localhost:6333/")

echo "$ROOT_API_RESPONSE" > results/root-api-"${CURRENT_DATE}".json

TELEMETRY_API_RESPONSE=$(ssh -t "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "curl -s http://localhost:6333/telemetry?details_level=10")

echo "$TELEMETRY_API_RESPONSE" > results/telemetry-api-"${CURRENT_DATE}".json
12 changes: 10 additions & 2 deletions tools/run_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,23 @@ trap 'handle_term' TERM

# Script, that runs benchmark within the GitHub Actions CI environment

BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}

SCRIPT=$(realpath "$0")
SCRIPT_PATH=$(dirname "$SCRIPT")

bash -x "${SCRIPT_PATH}/run_remote_benchmark.sh"

# Upload to postgres
# -t sorts by modification time
export SEARCH_RESULTS_FILE=$(ls -t results/*-search-*.json | head -n 1)
export UPLOAD_RESULTS_FILE=$(ls -t results/*-upload-*.json | head -n 1)
if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
else
# any other strategies are considered to have search & upload results
export SEARCH_RESULTS_FILE=$(ls -t results/*-search-*.json | head -n 1)
export UPLOAD_RESULTS_FILE=$(ls -t results/*-upload-*.json | head -n 1)
fi

export VM_RSS_MEMORY_USAGE_FILE=$(ls -t results/vm-rss-memory-usage-*.txt | head -n 1)
export RSS_ANON_MEMORY_USAGE_FILE=$(ls -t results/rss-anon-memory-usage-*.txt | head -n 1)
export ROOT_API_RESPONSE_FILE=$(ls -t results/root-api-*.json | head -n 1)
Expand Down
29 changes: 25 additions & 4 deletions tools/run_client_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,38 @@ BENCH_CLIENT_NAME=${CLIENT_NAME:-"benchmark-client-1"}

IP_OF_THE_CLIENT=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_public_ip.sh" "$BENCH_CLIENT_NAME")

scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment.sh"

ENGINE_NAME=${ENGINE_NAME:-"qdrant-continuous-benchmark"}

DATASETS=${DATASETS:-"laion-small-clip"}

SNAPSHOT_URL=${SNAPSHOT_URL:-""}

PRIVATE_IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_private_ip.sh" "$BENCH_SERVER_NAME")

RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} DATASETS=${DATASETS} PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} EXPERIMENT_MODE=${EXPERIMENT_MODE} bash ~/run_experiment.sh"
if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment_snapshot.sh"

RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} \
DATASETS=${DATASETS} \
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
SNAPSHOT_URL=${SNAPSHOT_URL} \
bash ~/run_experiment_snapshot.sh"

ssh -tt -o ServerAliveInterval=120 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"

ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
else
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment.sh"

RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} \
DATASETS=${DATASETS} \
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
bash ~/run_experiment.sh"

ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"

fi

echo "Gather experiment results..."
result_files_arr=()
Expand Down
43 changes: 39 additions & 4 deletions tools/run_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER:-""}

EXPERIMENT_MODE=${EXPERIMENT_MODE:-"full"}

SNAPSHOT_URL=${SNAPSHOT_URL:-""}

if [[ -z "$ENGINE_NAME" ]]; then
echo "ENGINE_NAME is not set"
exit 1
Expand All @@ -27,13 +29,21 @@ if [[ -z "$PRIVATE_IP_OF_THE_SERVER" ]]; then
fi

if [[ -z "$EXPERIMENT_MODE" ]]; then
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search"
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search | snapshot"
exit 1
fi

if [[ "$EXPERIMENT_MODE" == "snapshot" ]] && [[ -z "$SNAPSHOT_URL" ]]; then
echo "EXPERIMENT_MODE is 'snapshot' but SNAPSHOT_URL is not set"
exit 1
fi
docker container rm -f ci-benchmark-upload || true
docker container rm -f ci-benchmark-search || true

docker rmi --force qdrant/vector-db-benchmark:latest || true
if [[ "$EXPERIMENT_MODE" != "snapshot" ]]; then
docker container rm -f ci-benchmark-upload || true
docker container rm -f ci-benchmark-search || true

docker rmi --force qdrant/vector-db-benchmark:latest || true
fi

if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "upload" ]]; then
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
Expand Down Expand Up @@ -63,3 +73,28 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
qdrant/vector-db-benchmark:latest \
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload
fi


if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"

curl -X PUT \
"http://${PRIVATE_IP_OF_THE_SERVER}:6333/collections/benchmark/snapshots/recover" \
--data-raw "{\"location\": \"${SNAPSHOT_URL}\"}"

collection_url="http://${PRIVATE_IP_OF_THE_SERVER}:6333/collections/benchmark"
collection_status=$(curl -s "$collection_url" | jq -r '.result.status')
counter=0
while [[ "$collection_status" != "green" && "$counter" -lt 5 ]]; do
collection_status=$(curl -s "$collection_url" | jq -r '.result.status')
counter=$(expr $counter + 1)
sleep 1
done

if [[ "$collection_status" == "green" ]]; then
echo "Experiment stage: Done"
else
echo "Experiment interrupted: collection is not ready."
exit 1
fi
fi
38 changes: 33 additions & 5 deletions tools/run_remote_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ trap 'cleanup' EXIT
#SERVER_NAME=$BENCH_CLIENT_NAME SERVER_TYPE='cpx11' bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/create_and_install.sh"
#wait $SERVER_CREATION_PID

BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}

SERVER_NAME=$BENCH_SERVER_NAME bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/check_ssh_connection.sh"
SERVER_NAME=$BENCH_CLIENT_NAME bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/check_ssh_connection.sh"

if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
echo "CONTAINER_MEM_LIMIT is not set, run without memory limit"
case "$BENCHMARK_STRATEGY" in
"default")
echo "Default benchmark, no volume, no memory limit"

SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks"}

Expand All @@ -44,9 +47,14 @@ if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
bash -x "${SCRIPT_PATH}/run_client_script.sh"

bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
;;
"tenants")
if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
echo "Tenants benchmark, but CONTAINER_MEM_LIMIT is not set!"
exit 2
fi

else
echo "CONTAINER_MEM_LIMIT is set, run search with memory limit: ${CONTAINER_MEM_LIMIT}"
echo "Tenants benchmark, run search with memory limit: ${CONTAINER_MEM_LIMIT}"

SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-with-volume"}

Expand All @@ -59,6 +67,26 @@ else
bash -x "${SCRIPT_PATH}/run_client_script.sh" "search"

bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
;;

"collection-reload")
echo "Collection load time benchmark"

SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-snapshot"}

bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME"

fi
bash -x "${SCRIPT_PATH}/run_client_script.sh" "snapshot"

bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb" "continue"

sleep 10

bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
;;

*)
echo "Invalid BENCHMARK_STRATEGY value: $BENCHMARK_STRATEGY"
exit 1
;;
esac
Loading

0 comments on commit 5cea6f1

Please sign in to comment.