From e7c8e7b2e52cf927da29b9d857746592222be0ff Mon Sep 17 00:00:00 2001
From: Yingge He <157551214+yinggeh@users.noreply.github.com>
Date: Thu, 15 Aug 2024 20:35:51 -0700
Subject: [PATCH] feat: Add vLLM counter metrics access through Triton  (#7493)

Report vLLM counter metrics through Triton server
---
 build.py                   | 4 ++++
 docs/user_guide/metrics.md | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/build.py b/build.py
index 2c95cbded3..0487636b09 100755
--- a/build.py
+++ b/build.py
@@ -1806,6 +1806,10 @@ def backend_clone(
         os.path.join(build_dir, be, "src", "model.py"),
         backend_dir,
     )
+    clone_script.cpdir(
+        os.path.join(build_dir, be, "src", "utils"),
+        backend_dir,
+    )
 
     clone_script.comment()
     clone_script.comment(f"end '{be}' backend")
diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md
index 0a7f3cf1a3..b8fc0d8ee0 100644
--- a/docs/user_guide/metrics.md
+++ b/docs/user_guide/metrics.md
@@ -378,3 +378,9 @@ Further documentation can be found in the `TRITONSERVER_MetricFamily*` and
 The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about
 LLMs, KV Cache, and Inflight Batching to Triton:
 https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics
+
+### vLLM Backend Metrics
+
+The vLLM backend uses the custom metrics API to track and expose specific metrics about
+LLMs to Triton:
+https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics