From e7c8e7b2e52cf927da29b9d857746592222be0ff Mon Sep 17 00:00:00 2001 From: Yingge He <157551214+yinggeh@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:35:51 -0700 Subject: [PATCH] feat: Add vLLM counter metrics access through Triton (#7493) Report vLLM counter metrics through Triton server --- build.py | 4 ++++ docs/user_guide/metrics.md | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/build.py b/build.py index 2c95cbded3..0487636b09 100755 --- a/build.py +++ b/build.py @@ -1806,6 +1806,10 @@ def backend_clone( os.path.join(build_dir, be, "src", "model.py"), backend_dir, ) + clone_script.cpdir( + os.path.join(build_dir, be, "src", "utils"), + backend_dir, + ) clone_script.comment() clone_script.comment(f"end '{be}' backend") diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md index 0a7f3cf1a3..b8fc0d8ee0 100644 --- a/docs/user_guide/metrics.md +++ b/docs/user_guide/metrics.md @@ -378,3 +378,9 @@ Further documentation can be found in the `TRITONSERVER_MetricFamily*` and The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about LLMs, KV Cache, and Inflight Batching to Triton: https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics + +### vLLM Backend Metrics + +The vLLM backend uses the custom metrics API to track and expose specific metrics about +LLMs to Triton: +https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics