From f116031337d4ace385eff9a3cca6ab0d4b47edad Mon Sep 17 00:00:00 2001
From: Matthew Kotila <matthew.r.kotila@gmail.com>
Date: Thu, 28 Sep 2023 00:36:17 +0000
Subject: [PATCH] Throw exception when request period larger than max tokens
 rather than infinite loop

---
 src/c++/perf_analyzer/periodic_concurrency_worker.cc | 6 ++++++
 src/c++/perf_analyzer/periodic_concurrency_worker.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.cc b/src/c++/perf_analyzer/periodic_concurrency_worker.cc
index 9fbaee3cc..630997903 100644
--- a/src/c++/perf_analyzer/periodic_concurrency_worker.cc
+++ b/src/c++/perf_analyzer/periodic_concurrency_worker.cc
@@ -53,8 +53,14 @@ PeriodicConcurrencyWorker::WorkerCallback(uint32_t infer_context_id)
   if (ctxs_.at(infer_context_id)->GetNumResponsesForCurrentRequest() ==
       request_period_) {
     period_completed_callback_();
+    period_completed_callback_called_ = true;
   }
   if (ctxs_.at(infer_context_id)->HasReceivedFinalResponse()) {
+    if (period_completed_callback_called_ == false) {
+      throw std::runtime_error(
+          "Request received final response before request period was reached. "
+          "Request period parameter must be less than or equal to max tokens.");
+    }
     request_completed_callback_();
   }
 }
diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.h b/src/c++/perf_analyzer/periodic_concurrency_worker.h
index 7242219b9..b5ee3887c 100644
--- a/src/c++/perf_analyzer/periodic_concurrency_worker.h
+++ b/src/c++/perf_analyzer/periodic_concurrency_worker.h
@@ -75,6 +75,7 @@ class PeriodicConcurrencyWorker : public ConcurrencyWorker {
   std::function<void()> request_completed_callback_{nullptr};
   std::function<void(uint32_t)> worker_callback_{std::bind(
       &PeriodicConcurrencyWorker::WorkerCallback, this, std::placeholders::_1)};
+  bool period_completed_callback_called_{false};
 };
 
 }}  // namespace triton::perfanalyzer