diff --git a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py index fbe6675f..bea63ede 100644 --- a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py +++ b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py @@ -127,14 +127,14 @@ def test_vllm_metrics(self): # vllm:time_to_first_token_seconds self.assertEqual(metrics_dict["vllm:time_to_first_token_seconds_count"], 3) - self.assertTrue( - 0 < metrics_dict["vllm:time_to_first_token_seconds_sum"] < 0.0005 - ) + self.assertTrue(0 < metrics_dict["vllm:time_to_first_token_seconds_sum"] < 0.01) + self.assertEqual(metrics_dict["vllm:time_to_first_token_seconds_bucket"], 3) # vllm:time_per_output_token_seconds self.assertEqual(metrics_dict["vllm:time_per_output_token_seconds_count"], 45) self.assertTrue( - 0 <= metrics_dict["vllm:time_per_output_token_seconds_sum"] <= 0.005 + 0 < metrics_dict["vllm:time_per_output_token_seconds_sum"] < 0.1 ) + self.assertEqual(metrics_dict["vllm:time_per_output_token_seconds_bucket"], 45) def test_vllm_metrics_disabled(self): # Test vLLM metrics