Skip to content

Commit

Permalink
perf: Upgrade vLLM version to 0.6.3.post1 (#76)
Browse files Browse the repository at this point in the history
Co-authored-by: Olga Andreeva <[email protected]>
  • Loading branch information
kthui and oandreeva-nv authored Dec 20, 2024
1 parent 0b9c8e2 commit 2f5bfbd
Show file tree
Hide file tree
Showing 6 changed files with 354 additions and 382 deletions.
2 changes: 2 additions & 0 deletions ci/L0_backend_vllm/metrics_test/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ run_test() {
RET=1
fi
fi

set -e

# TODO: Non-graceful shutdown when metrics are enabled.
kill $SERVER_PID
wait $SERVER_PID
}
Expand Down
5 changes: 4 additions & 1 deletion ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def test_vllm_metrics(self):
total_prompts,
)

# TODO: Revisit this test due to the removal of best_of
def test_custom_sampling_params(self):
# Adding sampling parameters for testing metrics.
# Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html
Expand All @@ -191,6 +192,7 @@ def test_custom_sampling_params(self):
total_prompts = len(self.prompts)

# vllm:request_params_best_of
"""
self.assertEqual(
metrics_dict["vllm:request_params_best_of_count"], total_prompts
)
Expand All @@ -200,9 +202,10 @@ def test_custom_sampling_params(self):
self.assertEqual(
metrics_dict["vllm:request_params_best_of_bucket"], total_prompts
)
"""
# vllm:request_params_n
self.assertEqual(metrics_dict["vllm:request_params_n_count"], total_prompts)
self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
# self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
self.assertEqual(metrics_dict["vllm:request_params_n_bucket"], total_prompts)

def test_vllm_metrics_disabled(self):
Expand Down
36 changes: 0 additions & 36 deletions ci/L0_check_health_vllm/mock_async_llm_engine.py

This file was deleted.

20 changes: 14 additions & 6 deletions ci/L0_check_health_vllm/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,24 @@ function enable_health_check {
echo -e "}" >> models/vllm_opt/config.pbtxt
}

VLLM_INSTALL_PATH="/usr/local/lib/python3.12/dist-packages/vllm"

function mock_vllm_async_llm_engine {
mv /opt/tritonserver/backends/vllm/model.py /opt/tritonserver/backends/vllm/.model.py.backup
cp /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
sed -i 's/from vllm.engine.async_llm_engine import AsyncLLMEngine/from mock_async_llm_engine import mock_AsyncLLMEngine as AsyncLLMEngine/' /opt/tritonserver/backends/vllm/model.py
cp mock_async_llm_engine.py /opt/tritonserver/backends/vllm
# backup original file
mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup
cp $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
# overwrite the original check_health method
echo -e "" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
echo -e " async def check_health(self, check_count=[0]):" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
echo -e " check_count[0] += 1" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
echo -e " if check_count[0] > 1:" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
echo -e " raise RuntimeError(\"Simulated vLLM check_health() failure\")" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
}

function unmock_vllm_async_llm_engine {
rm -f /opt/tritonserver/backends/vllm/mock_async_llm_engine.py /opt/tritonserver/backends/vllm/model.py
mv /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
# restore from backup
rm -f $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
}

function test_check_health {
Expand Down
Loading

0 comments on commit 2f5bfbd

Please sign in to comment.