Skip to content

Commit

Permalink
Merge branch 'main' of github.com:triton-inference-server/vllm_backen…
Browse files Browse the repository at this point in the history
…d into jacky-cancel-thread
  • Loading branch information
kthui committed Aug 6, 2024
2 parents 6b7e241 + a345a1d commit 4c19f87
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ def response_loop(self):
if item is None:
break
response_state, response, response_flag = item
del item
response_sender = response_state["response_sender"]
try:
response_sender.send(response, response_flag)
Expand All @@ -302,10 +301,6 @@ def response_loop(self):
finally:
if response_flag == pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL:
self.ongoing_request_count -= 1
del response_state
del response_sender
if self.ongoing_request_count == 0:
gc.collect()

def create_response(self, vllm_output, prepend_input):
"""
Expand Down Expand Up @@ -474,10 +469,6 @@ async def generate(self, request):
finally:
if decrement_ongoing_request_count:
self.ongoing_request_count -= 1
del response_state
del response_sender
if self.ongoing_request_count == 0:
gc.collect()

def verify_loras(self, request):
# We will check if the requested lora exists here, if not we will send a
Expand Down Expand Up @@ -555,3 +546,9 @@ def finalize(self):
if self._response_thread is not None:
self._response_thread.join()
self._response_thread = None

# When using parallel tensors, the stub process may not shutdown due to
# unreleased references, so manually run the garbage collector once.
self.logger.log_info("[vllm] Running Garbage Collector on finalize...")
gc.collect()
self.logger.log_info("[vllm] Garbage Collector on finalize... done")

0 comments on commit 4c19f87

Please sign in to comment.