From 15d00ef9d251a88f214219e37323bab68eabae33 Mon Sep 17 00:00:00 2001 From: Jacky <18255193+kthui@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:28:26 -0700 Subject: [PATCH] Move decrement_ongoing_request_count to top to cover all exceptions --- src/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model.py b/src/model.py index c537364e..7f7010a8 100644 --- a/src/model.py +++ b/src/model.py @@ -343,6 +343,7 @@ async def generate(self, request): """ response_sender = request.get_response_sender() self.ongoing_request_count += 1 + decrement_ongoing_request_count = True try: request_id = random_uuid() prompt = pb_utils.get_input_tensor_by_name( @@ -400,7 +401,6 @@ async def generate(self, request): response_iterator = await self.llm_engine.add_request( request_id, prompt, sampling_params ) - decrement_ongoing_request_count = True async for output in response_iterator: if response_sender.is_cancelled():